On  5 Jan 2005, Rusty Russell <[EMAIL PROTECTED]> wrote:
> On Tue, 2005-01-04 at 18:24 +0100, Robert Lemmen wrote:
> > hi rusty,
> > 
> > i read on some webpage about rsync and debian that you wrote a patch to
> > rsync that let's it uses heuristics when deciding which local file to
> > use. could you tell me whether this is planned to be included in a rsync
> > release? could i have that patch?
> 
> Hmm, good question.  This is from 2.5.4, and can't remember how well it
> worked.  Good luck!

I'm not the rsync maintainer anymore, but I think it would be cool if
this were merged, if the current team feels OK about it.


> 
> Rusty.
> 
> diff -urN rsync-2.5.4/Makefile.in rsync-2.5.4-fuzzy/Makefile.in
> --- rsync-2.5.4/Makefile.in   2002-02-26 05:48:25.000000000 +1100
> +++ rsync-2.5.4-fuzzy/Makefile.in     2002-04-03 16:35:55.000000000 +1000
> @@ -28,7 +28,7 @@
>  ZLIBOBJ=zlib/deflate.o zlib/infblock.o zlib/infcodes.o zlib/inffast.o \
>       zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \
>       zlib/zutil.o zlib/adler32.o 
> -OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o 
> main.o checksum.o match.o syscall.o log.o backup.o
> +OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o 
> main.o checksum.o match.o syscall.o log.o backup.o alternate.o
>  OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o 
> fileio.o batch.o \
>       clientname.o
>  DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o 
> authenticate.o
> diff -urN rsync-2.5.4/alternate.c rsync-2.5.4-fuzzy/alternate.c
> --- rsync-2.5.4/alternate.c   1970-01-01 10:00:00.000000000 +1000
> +++ rsync-2.5.4-fuzzy/alternate.c     2002-04-03 17:04:15.000000000 +1000
> @@ -0,0 +1,117 @@
> +#include "rsync.h"
> +
> +extern char *compare_dest;
> +extern int verbose;
> +
> +/* Alternate methods for opening files, if local doesn't exist */
> +/* Sanity check that we are about to open regular file */
> +int do_open_regular(char *fname)
> +{
> +     STRUCT_STAT st;
> +
> +     if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode))
> +             return do_open(fname, O_RDONLY, 0);
> +
> +     return -1;
> +}
> +
> +static void split_names(char *fname, char **dirname, char **basename)
> +{
> +     char *slash;
> +
> +     slash = strrchr(fname, '/');
> +     if (slash) {
> +             *dirname = fname;
> +             *slash = '\0';
> +             *basename = slash+1;
> +     } else {
> +             *basename = fname;
> +             *dirname = ".";
> +     }
> +}
> +
> +static unsigned int measure_name(const char *name,
> +                              const char *basename,
> +                              const char *ext)
> +{
> +     int namelen = strlen(name);
> +     int extlen = strlen(ext);
> +     unsigned int score = 0;
> +
> +     /* Extensions must match */
> +     if (namelen <= extlen || strcmp(name+namelen-extlen, ext) != 0)
> +             return 0;
> +
> +     /* Now score depends on similarity of prefix */
> +     for (; *name==*basename && *name; name++, basename++)
> +             score++;
> +     return score;
> +}
> +
> +int open_alternate_base_fuzzy(const char *fname)
> +{
> +     DIR *d;
> +     struct dirent *di;
> +     char *basename, *dirname;
> +     char mangled_name[MAXPATHLEN];
> +     char bestname[MAXPATHLEN];
> +     unsigned int bestscore = 0;
> +     const char *ext;
> +
> +     /* FIXME: can we assume fname fits here? */
> +     strcpy(mangled_name, fname);
> +
> +     split_names(mangled_name, &dirname, &basename);
> +     d = opendir(dirname);
> +     if (!d) {
> +             rprintf(FERROR,"recv_generator opendir(%s): %s\n",
> +                     dirname,strerror(errno));
> +             return -1;
> +     }
> +
> +     /* Get final extension, eg. .gz; never full basename though. */
> +     ext = strrchr(basename + 1, '.');
> +     if (!ext)
> +             ext = basename + strlen(basename); /* ext = "" */
> +
> +     while ((di = readdir(d)) != NULL) {
> +             const char *dname = d_name(di);
> +             unsigned int score;
> +
> +             if (strcmp(dname,".")==0 ||
> +                 strcmp(dname,"..")==0)
> +                     continue;
> +             
> +             score = measure_name(dname, basename, ext);
> +             if (verbose > 4)
> +                     rprintf(FINFO,"fuzzy score for %s = %u\n",
> +                             dname, score);
> +             if (score > bestscore) {
> +                     strcpy(bestname, dname); 
> +                     bestscore = score;
> +             }
> +     }
> +     closedir(d);
> +
> +     /* Found a candidate. */
> +     if (bestscore != 0) {
> +             char fuzzyname[MAXPATHLEN];
> +
> +             snprintf(fuzzyname,MAXPATHLEN,"%s/%s", dirname, bestname);
> +             if (verbose > 2)
> +                     rprintf(FINFO,"fuzzy match %s->%s\n",
> +                             fname, fuzzyname);
> +             return do_open_regular(fuzzyname);
> +     }
> +     return -1;
> +}
> +
> +int open_alternate_base_comparedir(const char *fname)
> +{
> +     char fnamebuf[MAXPATHLEN];
> +     /* try the file at compare_dest instead */
> +     snprintf(fnamebuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
> +
> +     /* FIXME: now follows symlinks... */
> +     return do_open_regular(fnamebuf);
> +}
> diff -urN rsync-2.5.4/generator.c rsync-2.5.4-fuzzy/generator.c
> --- rsync-2.5.4/generator.c   2002-02-08 03:36:12.000000000 +1100
> +++ rsync-2.5.4-fuzzy/generator.c     2002-04-03 17:00:06.000000000 +1000
> @@ -42,11 +42,12 @@
>  extern int always_checksum;
>  extern int modify_window;
>  extern char *compare_dest;
> +extern int fuzzy;
>  
>  
>  /* choose whether to skip a particular file */
>  static int skip_file(char *fname,
> -                  struct file_struct *file, STRUCT_STAT *st)
> +                  struct file_struct *file, const STRUCT_STAT *st)
>  {
>       if (st->st_size != file->length) {
>               return 0;
> @@ -185,7 +186,61 @@
>       return s;
>  }
>  
> +/* Returns -1 for can't open (null file), -2 for skip */
> +static int open_base_file(struct file_struct *file,
> +                       char *fname, 
> +                       int statret, 
> +                       STRUCT_STAT *st)
> +{
> +     int fd = -1;
> +
> +     if (statret == 0) {
> +             if (S_ISREG(st->st_mode)) {
> +                     if (update_only
> +                         && cmp_modtime(st->st_mtime, file->modtime) > 0) {
> +                             if (verbose > 1)
> +                                     rprintf(FINFO,"%s is newer\n",fname);
> +                             return -2;
> +                     }
> +                     if (skip_file(fname, file, st)) {
> +                             set_perms(fname, file, st, 1);
> +                             return -2;
> +                     }
> +                     fd = do_open(fname, O_RDONLY, 0);
> +                     if (fd == -1) {
> +                             rprintf(FERROR,"failed to open %s, continuing : 
> %s\n",fname,strerror(errno));
> +                             return -1;
> +                     } else
> +                             return fd;
> +             } else {
> +                     /* Try to use symlink contents */
> +                     if (S_ISLNK(st->st_mode)) {
> +                             fd = do_open_regular(fname);
> +                             /* Don't delete yet; receiver will need it */
> +                     } else {
> +                             if (delete_file(fname) != 0) {
> +                                     if (fd != -1)
> +                                             close(fd);
> +                                     return -2;
> +                             }
> +                     }
> +             }
> +     }
> +
> +     if (fd == -1 && compare_dest != NULL)
> +             fd = open_alternate_base_comparedir(fname);
>  
> +     if (fd == -1 && fuzzy)
> +             fd = open_alternate_base_fuzzy(fname);
> +
> +     /* Update stat to understand size */
> +     if (fd != -1) {
> +             if (do_fstat(fd, st) != 0)
> +                     rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
> +     }
> +
> +     return fd;
> +}
>  
>  /*
>   * Acts on file number I from FLIST, whose name is fname.
> @@ -203,9 +258,6 @@
>       struct sum_struct *s;
>       int statret;
>       struct file_struct *file = flist->files[i];
> -     char *fnamecmp;
> -     char fnamecmpbuf[MAXPATHLEN];
> -     extern char *compare_dest;
>       extern int list_only;
>       extern int preserve_perms;
>       extern int only_existing;
> @@ -341,82 +393,29 @@
>               return;
>       }
>  
> -     fnamecmp = fname;
> -
> -     if ((statret == -1) && (compare_dest != NULL)) {
> -             /* try the file at compare_dest instead */
> -             int saveerrno = errno;
> -             snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",compare_dest,fname);
> -             statret = link_stat(fnamecmpbuf,&st);
> -             if (!S_ISREG(st.st_mode))
> -                     statret = -1;
> -             if (statret == -1)
> -                     errno = saveerrno;
> -             else
> -                     fnamecmp = fnamecmpbuf;
> -     }
> -
> -     if (statret == -1) {
> -             if (errno == ENOENT) {
> -                     write_int(f_out,i);
> -                     if (!dry_run) send_sums(NULL,f_out);
> -             } else {
> -                     if (verbose > 1)
> -                             rprintf(FERROR, RSYNC_NAME
> -                                     ": recv_generator failed to open 
> \"%s\": %s\n",
> -                                     fname, strerror(errno));
> -             }
> -             return;
> -     }
> -
> -     if (!S_ISREG(st.st_mode)) {
> -             if (delete_file(fname) != 0) {
> -                     return;
> -             }
> -
> -             /* now pretend the file didn't exist */
> -             write_int(f_out,i);
> -             if (!dry_run) send_sums(NULL,f_out);    
> -             return;
> -     }
> -
> -     if (opt_ignore_existing && fnamecmp == fname) { 
> -             if (verbose > 1)
> -                     rprintf(FINFO,"%s exists\n",fname);
> -             return;
> -     } 
> -
> -     if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp 
> == fname) {
> +     /* Failed to stat for some other reason. */
> +     if (statret == -1 && errno != ENOENT) {
>               if (verbose > 1)
> -                     rprintf(FINFO,"%s is newer\n",fname);
> +                     rprintf(FERROR, RSYNC_NAME
> +                             ": recv_generator failed to open \"%s\": %s\n",
> +                             fname, strerror(errno));
>               return;
>       }
>  
> -     if (skip_file(fname, file, &st)) {
> -             if (fnamecmp == fname)
> -                     set_perms(fname,file,&st,1);
> -             return;
> -     }
> -
> -     if (dry_run) {
> -             write_int(f_out,i);
> +     fd = open_base_file(file, fname, statret, &st);
> +     if (fd == -2)
>               return;
> -     }
> -
> -     if (whole_file) {
> -             write_int(f_out,i);
> -             send_sums(NULL,f_out);    
> -             return;
> -     }
> -
> -     /* open the file */  
> -     fd = do_open(fnamecmp, O_RDONLY, 0);
>  
> -     if (fd == -1) {
> -             rprintf(FERROR,RSYNC_NAME": failed to open \"%s\", continuing : 
> %s\n",fnamecmp,strerror(errno));
> -             /* pretend the file didn't exist */
> +     if ((whole_file || dry_run) && fd != -1) {
> +             close(fd);
> +             fd = -1;
> +     }
> + 
> +     if (fd == -1) {
> +             /* the file didn't exist, or we can pretend it doesn't */
>               write_int(f_out,i);
> -             send_sums(NULL,f_out);
> +             if (!dry_run)
> +                     send_sums(NULL,f_out);
>               return;
>       }
>  
> @@ -427,7 +426,7 @@
>       }
>  
>       if (verbose > 3)
> -             rprintf(FINFO,"gen mapped %s of size 
> %.0f\n",fnamecmp,(double)st.st_size);
> +             rprintf(FINFO,"gen mapped %s of size 
> %.0f\n",fname,(double)st.st_size);
>  
>       s = generate_sums(buf,st.st_size,adapt_block_size(file, block_size));
>  
> diff -urN rsync-2.5.4/options.c rsync-2.5.4-fuzzy/options.c
> --- rsync-2.5.4/options.c     2002-02-28 09:49:57.000000000 +1100
> +++ rsync-2.5.4-fuzzy/options.c       2002-04-03 16:43:54.000000000 +1000
> @@ -73,6 +73,7 @@
>  #else
>  int modify_window=0;
>  #endif
> +int fuzzy=0;
>  int blocking_io=-1;
>  
>  /** Network address family. **/
> @@ -245,6 +246,7 @@
>    rprintf(F,"     --bwlimit=KBPS          limit I/O bandwidth, KBytes per 
> second\n");
>    rprintf(F,"     --write-batch=PREFIX    write batch fileset starting with 
> PREFIX\n");
>    rprintf(F,"     --read-batch=PREFIX     read batch fileset starting with 
> PREFIX\n");
> +  rprintf(F,"     --fuzzy              use similar file as basis if it 
> does't exist\n");
>    rprintf(F," -h, --help                  show this help screen\n");
>  #ifdef INET6
>    rprintf(F," -4                          prefer IPv4\n");
> @@ -340,6 +342,7 @@
>    {"hard-links",      'H', POPT_ARG_NONE,   &preserve_hard_links},
>    {"read-batch",       0,  POPT_ARG_STRING, &batch_prefix, OPT_READ_BATCH},
>    {"write-batch",      0,  POPT_ARG_STRING, &batch_prefix, OPT_WRITE_BATCH},
> +  {"fuzzy",         0,  POPT_ARG_NONE,   &fuzzy},
>  #ifdef INET6
>    {0,                      '4', POPT_ARG_VAL,    &default_af_hint,   AF_INET 
> },
>    {0,                      '6', POPT_ARG_VAL,    &default_af_hint,   
> AF_INET6 },
> @@ -757,7 +760,9 @@
>               args[ac++] = "--compare-dest";
>               args[ac++] = compare_dest;
>       }
> -
> +     
> +     if (fuzzy && am_sender)
> +             args[ac++] = "--fuzzy";
>  
>       *argc = ac;
>  }
> diff -urN rsync-2.5.4/proto.h rsync-2.5.4-fuzzy/proto.h
> --- rsync-2.5.4/proto.h       2002-02-23 11:05:06.000000000 +1100
> +++ rsync-2.5.4-fuzzy/proto.h 2002-04-03 16:35:25.000000000 +1000
> @@ -256,3 +256,6 @@
>  int cmp_modtime(time_t file1, time_t file2);
>  int _Insure_trap_error(int a1, int a2, int a3, int a4, int a5, int a6);
>  int sys_gettimeofday(struct timeval *tv);
> +int do_open_regular(char *fname);
> +int open_alternate_base_fuzzy(const char *fname);
> +int open_alternate_base_comparedir(const char *fname);
> diff -urN rsync-2.5.4/receiver.c rsync-2.5.4-fuzzy/receiver.c
> --- rsync-2.5.4/receiver.c    2002-02-14 05:42:20.000000000 +1100
> +++ rsync-2.5.4-fuzzy/receiver.c      2002-04-03 16:46:46.000000000 +1000
> @@ -36,6 +36,7 @@
>  extern char *compare_dest;
>  extern int make_backups;
>  extern char *backup_suffix;
> +extern int fuzzy;
>  
>  static struct delete_list {
>       DEV64_T dev;
> @@ -307,8 +308,6 @@
>       char *fname;
>       char template[MAXPATHLEN];
>       char fnametmp[MAXPATHLEN];
> -     char *fnamecmp;
> -     char fnamecmpbuf[MAXPATHLEN];
>       struct map_struct *buf;
>       int i;
>       struct file_struct *file;
> @@ -366,28 +365,24 @@
>               if (verbose > 2)
>                       rprintf(FINFO,"recv_files(%s)\n",fname);
>  
> -             fnamecmp = fname;
> -
>               /* open the file */  
> -             fd1 = do_open(fnamecmp, O_RDONLY, 0);
> +             fd1 = do_open(fname, O_RDONLY, 0);
>  
> -             if ((fd1 == -1) && (compare_dest != NULL)) {
> -                     /* try the file at compare_dest instead */
> -                     snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",
> -                                             compare_dest,fname);
> -                     fnamecmp = fnamecmpbuf;
> -                     fd1 = do_open(fnamecmp, O_RDONLY, 0);
> -             }
> +             if (fd1 == -1 && compare_dest != NULL)
> +                     fd1 = open_alternate_base_comparedir(fname);
> +
> +             if (fd1 == -1 && fuzzy)
> +                     fd1 = open_alternate_base_fuzzy(fname);
>  
>               if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
> -                     rprintf(FERROR,"fstat %s : 
> %s\n",fnamecmp,strerror(errno));
> +                     rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno));
>                       receive_data(f_in,NULL,-1,NULL,file->length);
>                       close(fd1);
>                       continue;
>               }
>  
>               if (fd1 != -1 && !S_ISREG(st.st_mode)) {
> -                     rprintf(FERROR,"%s : not a regular file 
> (recv_files)\n",fnamecmp);
> +                     rprintf(FERROR,"%s : not a regular file 
> (recv_files)\n",fname);
>                       receive_data(f_in,NULL,-1,NULL,file->length);
>                       close(fd1);
>                       continue;
> @@ -403,7 +398,7 @@
>               if (fd1 != -1 && st.st_size > 0) {
>                       buf = map_file(fd1,st.st_size);
>                       if (verbose > 2)
> -                             rprintf(FINFO,"recv mapped %s of size 
> %.0f\n",fnamecmp,(double)st.st_size);
> +                             rprintf(FINFO,"recv mapped %s of size 
> %.0f\n",fname,(double)st.st_size);
>               } else {
>                       buf = NULL;
>               }
> 
> -- 
> A bad analogy is like a leaky screwdriver -- Richard Braakman
-- 
Martin
-- 
To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync
Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html

Reply via email to