(I apologize in advance for the messy linebreaks this will no doubt have.
I haven't yet figured out netscape's mail-compose window.)

The attached diff does the following:
        1.  It adds a "string_area" to the flist structure.  This is a linked
        list of big blocks of memory; when the flist is making files and
        strdup()'ing lots of names, it instead gets space out of the areas.
        On some simple tests (rsync -an ../mod_ssl /tmp) it lowered
        send_file_list from 50% to 1% of the profile time.

        2.  It adds an flist_new() routine to create a new flist, and replaces
        the two places (that I found :) that had inline code to use it.

        3.  It replaces a couple of memset() calls with structure-assignment
        to more properly "null-out" all fields. On many systems it can be
        more efficient, since the compiler is more likely to inline it.

I did some tests and the changes seem okay.  I would appreciate other tests,
particularly those who run rsync servers.

I've been thinking about the "put it in a database" issue.  The issues aren't
trivial.  Putting the checksum there is obvious. How do you know if
it's out of date?  Add a timestamp. This means you have to stat() the file.
At that point, everything else is pulled out of the stat structure, so it's
probably not worth complicating the database file.  So the DB becomes
a mapping from filename to {time_t, md4_sum} entries.

To get more complicated, you could store different data for directories,
such as {timestamp, dir-entries-list}. This could save a set of
{open,read,close}dir
calls for every directory. That doesn't seem like much of a gain, especially
since the work to serialize the directory data to and from the DB will
be a pain. And you can only put entry names there, since changes to
metadata (e.g., chmod) don't update the directory timestamp.

So, I think putting in some calls to DB in file_checksum() might be worth it.
If the attached diff is useful and gets accepted, then I might do that.
(Unlike this diff, however, it'd be a compile-time option, so the changes
might be more involved, involving config.in et al)

Comments?
        /r$
Index: flist.c
===================================================================
RCS file: /cvsroot/rsync/flist.c,v
retrieving revision 1.84
diff -c -r1.84 flist.c
*** flist.c     2000/03/21 04:06:04     1.84
--- flist.c     2000/06/12 19:44:06
***************
*** 51,58 ****
--- 51,110 ----
  
  static struct exclude_struct **local_exclude_list;
  
+ static struct file_struct null_file;
+ 
  static void clean_flist(struct file_list *flist, int strip_root);
  
+ struct string_area *string_area_new(int size)
+ {
+       struct string_area *a;
+ 
+       if (size <= 0) size = ARENA_SIZE;
+       a = malloc(sizeof(*a));
+       if (!a) out_of_memory("string_area_new");
+       a->current = a->base = malloc(size);
+       if (!a->current) out_of_memory("string_area_new buffer");
+       a->end = a->base + size;
+       a->next = 0;
+ 
+       return a;
+ }
+ 
+ void string_area_free(struct string_area *a)
+ {
+       struct string_area *next;
+ 
+       for ( ; a ; a = next) {
+               next = a->next;
+               free(a->base);
+       }
+ }
+ 
+ char *string_area_malloc(struct string_area **ap, int size)
+ {
+       char *p;
+       struct string_area *a;
+ 
+       /* does the request fit into the current space? */
+       a = *ap;
+       if (a->current + size >= a->end) {
+               /* no; get space, move new string_area to front of the list */
+               a = string_area_new(size > ARENA_SIZE ? size : ARENA_SIZE);
+               a->next = *ap;
+               *ap = a;
+       }
+ 
+       /* have space; do the "allocation." */
+       p = a->current;
+       a->current += size;
+       return p;
+ }
+ 
+ char *string_area_strdup(struct string_area **ap, const char *src)
+ {
+       char* dest = string_area_malloc(ap, strlen(src) + 1);
+       return strcpy(dest, src);
+ }
  
  static void list_file_entry(struct file_struct *f)
  {
***************
*** 413,420 ****
        return (st2.st_dev != filesystem_dev);
  }
  
  /* create a file_struct for a named file */
! struct file_struct *make_file(int f, char *fname)
  {
        struct file_struct *file;
        STRUCT_STAT st;
--- 465,475 ----
        return (st2.st_dev != filesystem_dev);
  }
  
+ #define STRDUP(ap, p) (ap ? string_area_strdup(ap, p) : strdup(p))
+ #define MALLOC(ap, i) (ap ? string_area_malloc(ap, i) : malloc(i))
+ 
  /* create a file_struct for a named file */
! struct file_struct *make_file(int f, char *fname, struct string_area **ap)
  {
        struct file_struct *file;
        STRUCT_STAT st;
***************
*** 468,481 ****
                if (lastdir && strcmp(fname, lastdir)==0) {
                        file->dirname = lastdir;
                } else {
!                       file->dirname = strdup(fname);
                        lastdir = file->dirname;
                }
!               file->basename = strdup(p+1);
                *p = '/';
        } else {
                file->dirname = NULL;
!               file->basename = strdup(fname);
        }
  
        file->modtime = st.st_mtime;
--- 523,536 ----
                if (lastdir && strcmp(fname, lastdir)==0) {
                        file->dirname = lastdir;
                } else {
!                       file->dirname = STRDUP(ap, fname);
                        lastdir = file->dirname;
                }
!               file->basename = STRDUP(ap, p+1);
                *p = '/';
        } else {
                file->dirname = NULL;
!               file->basename = STRDUP(ap, fname);
        }
  
        file->modtime = st.st_mtime;
***************
*** 491,502 ****
  
  #if SUPPORT_LINKS
        if (S_ISLNK(st.st_mode)) {
!               file->link = strdup(linkbuf);
        }
  #endif
  
        if (always_checksum) {
!               file->sum = (char *)malloc(MD4_SUM_LENGTH);
                if (!file->sum) out_of_memory("md4 sum");
                /* drat. we have to provide a null checksum for non-regular
                   files in order to be compatible with earlier versions
--- 546,557 ----
  
  #if SUPPORT_LINKS
        if (S_ISLNK(st.st_mode)) {
!               file->link = STRDUP(ap, linkbuf);
        }
  #endif
  
        if (always_checksum) {
!               file->sum = (char *)MALLOC(ap, MD4_SUM_LENGTH);
                if (!file->sum) out_of_memory("md4 sum");
                /* drat. we have to provide a null checksum for non-regular
                   files in order to be compatible with earlier versions
***************
*** 513,519 ****
                if (lastdir && strcmp(lastdir, flist_dir)==0) {
                        file->basedir = lastdir;
                } else {
!                       file->basedir = strdup(flist_dir);
                        lastdir = file->basedir;
                }
        } else {
--- 568,574 ----
                if (lastdir && strcmp(lastdir, flist_dir)==0) {
                        file->basedir = lastdir;
                } else {
!                       file->basedir = STRDUP(ap, flist_dir);
                        lastdir = file->basedir;
                }
        } else {
***************
*** 533,539 ****
  {
    struct file_struct *file;
  
!   file = make_file(f,fname);
  
    if (!file) return;  
    
--- 588,594 ----
  {
    struct file_struct *file;
  
!   file = make_file(f,fname, &flist->string_area);
  
    if (!file) return;  
    
***************
*** 623,629 ****
  }
  
  
- 
  struct file_list *send_file_list(int f,int argc,char *argv[])
  {
        int i,l;
--- 678,683 ----
***************
*** 639,653 ****
        }
  
        start_write = stats.total_written;
- 
-       flist = (struct file_list *)malloc(sizeof(flist[0]));
-       if (!flist) out_of_memory("send_file_list");
  
!       flist->count=0;
!       flist->malloced = 1000;
!       flist->files = (struct file_struct **)malloc(sizeof(flist->files[0])*
!                                                    flist->malloced);
!       if (!flist->files) out_of_memory("send_file_list");
  
        if (f != -1) {
                io_start_buffering(f);
--- 693,700 ----
        }
  
        start_write = stats.total_written;
  
!       flist = flist_new();
  
        if (f != -1) {
                io_start_buffering(f);
***************
*** 935,956 ****
        if (file->basename) free(file->basename);
        if (file->link) free(file->link);
        if (file->sum) free(file->sum);
!       memset((char *)file, 0, sizeof(*file));
  }
  
  
  /*
   * free up all elements in a flist
   */
  void flist_free(struct file_list *flist)
  {
        int i;
        for (i=1;i<flist->count;i++) {
!               free_file(flist->files[i]);
                free(flist->files[i]);
        }       
        memset((char *)flist->files, 0, sizeof(flist->files[0])*flist->count);
        free(flist->files);
        memset((char *)flist, 0, sizeof(*flist));
        free(flist);
  }
--- 982,1028 ----
        if (file->basename) free(file->basename);
        if (file->link) free(file->link);
        if (file->sum) free(file->sum);
!       *file = null_file;
  }
  
  
  /*
+  * allocate a new file list
+  */
+ struct file_list *flist_new()
+ {
+       struct file_list *flist;
+ 
+       flist = (struct file_list *)malloc(sizeof(flist[0]));
+       if (!flist) out_of_memory("send_file_list");
+ 
+       flist->count=0;
+       flist->malloced = 1000;
+       flist->files = (struct file_struct **)malloc(sizeof(flist->files[0])*
+                                                    flist->malloced);
+       if (!flist->files) out_of_memory("send_file_list");
+ #if ARENA_SIZE > 0
+       flist->string_area = string_area_new(0);
+ #else
+       flist->string_area = 0;
+ #endif
+       return flist;
+ }
+ /*
   * free up all elements in a flist
   */
  void flist_free(struct file_list *flist)
  {
        int i;
        for (i=1;i<flist->count;i++) {
!               if (!flist->string_area)
!                       free_file(flist->files[i]);
                free(flist->files[i]);
        }       
        memset((char *)flist->files, 0, sizeof(flist->files[0])*flist->count);
        free(flist->files);
+       if (flist->string_area)
+               string_area_free(flist->string_area);
        memset((char *)flist, 0, sizeof(*flist));
        free(flist);
  }
***************
*** 979,985 ****
                        if (verbose > 1 && !am_server)
                                rprintf(FINFO,"removing duplicate name %s from file 
list %d\n",
                                        f_name(flist->files[i-1]),i-1);
!                       free_file(flist->files[i]);
                } 
        }
  
--- 1051,1063 ----
                        if (verbose > 1 && !am_server)
                                rprintf(FINFO,"removing duplicate name %s from file 
list %d\n",
                                        f_name(flist->files[i-1]),i-1);
!                       /* it's not great that the flist knows the semantics of the
!                        * file memory usage, but i'd rather not add a flag byte
!                        * to that struct. XXX can i use a bit in the flags field? */
!                       if (flist->string_area)
!                               flist->files[i][0] = null_file;
!                       else
!                               free_file(flist->files[i]);
                } 
        }
  
Index: rsync.h
===================================================================
RCS file: /cvsroot/rsync/rsync.h,v
retrieving revision 1.92
diff -c -r1.92 rsync.h
*** rsync.h     2000/03/30 14:15:00     1.92
--- rsync.h     2000/06/12 19:44:06
***************
*** 305,314 ****
--- 305,324 ----
  };
  
  
+ #define ARENA_SIZE    (32 * 1024)
+ 
+ struct string_area {
+       char *base;
+       char *end;
+       char *current;
+       struct string_area *next;
+ };
+ 
  struct file_list {
        int count;
        int malloced;
        struct file_struct **files;
+       struct string_area *string_area;
  };
  
  struct sum_buf {
Index: proto.h
===================================================================
RCS file: /cvsroot/rsync/proto.h,v
retrieving revision 1.119
diff -c -r1.119 proto.h
*** proto.h     2000/04/09 02:36:07     1.119
--- proto.h     2000/06/12 19:44:07
***************
*** 39,47 ****
  struct map_struct *map_file(int fd,OFF_T len);
  char *map_ptr(struct map_struct *map,OFF_T offset,int len);
  void unmap_file(struct map_struct *map);
  int readlink_stat(const char *Path, STRUCT_STAT *Buffer, char *Linkbuf) ;
  int link_stat(const char *Path, STRUCT_STAT *Buffer) ;
! struct file_struct *make_file(int f, char *fname);
  void send_file_name(int f,struct file_list *flist,char *fname,
                           int recursive, unsigned base_flags);
  struct file_list *send_file_list(int f,int argc,char *argv[]);
--- 39,51 ----
  struct map_struct *map_file(int fd,OFF_T len);
  char *map_ptr(struct map_struct *map,OFF_T offset,int len);
  void unmap_file(struct map_struct *map);
+ struct string_area *string_area_new(int size);
+ void string_area_free(struct string_area *a);
+ char *string_area_malloc(struct string_area **ap, int size);
+ char *string_area_strdup(struct string_area **ap, const char *src);
  int readlink_stat(const char *Path, STRUCT_STAT *Buffer, char *Linkbuf) ;
  int link_stat(const char *Path, STRUCT_STAT *Buffer) ;
! struct file_struct *make_file(int f, char *fname, struct string_area **ap);
  void send_file_name(int f,struct file_list *flist,char *fname,
                           int recursive, unsigned base_flags);
  struct file_list *send_file_list(int f,int argc,char *argv[]);
***************
*** 49,54 ****
--- 53,59 ----
  int file_compare(struct file_struct **f1,struct file_struct **f2);
  int flist_find(struct file_list *flist,struct file_struct *f);
  void free_file(struct file_struct *file);
+ struct file_list *flist_new();
  void flist_free(struct file_list *flist);
  char *f_name(struct file_struct *f);
  void recv_generator(char *fname,struct file_list *flist,int i,int f_out);
Index: backup.c
===================================================================
RCS file: /cvsroot/rsync/backup.c,v
retrieving revision 1.4
diff -c -r1.4 backup.c
*** backup.c    2000/01/30 00:56:43     1.4
--- backup.c    2000/06/12 19:44:07
***************
*** 197,203 ****
        if (do_stat (fname, &st)) return 1;
  #endif
  
!       file = make_file (0, fname);
  
          /* make a complete pathname for backup file */
          if (strlen(backup_dir) + strlen(fname) > (MAXPATHLEN - 1)) {
--- 197,203 ----
        if (do_stat (fname, &st)) return 1;
  #endif
  
!       file = make_file (0, fname, 0);
  
          /* make a complete pathname for backup file */
          if (strlen(backup_dir) + strlen(fname) > (MAXPATHLEN - 1)) {

Reply via email to