Andi Kleen a écrit :


(because of the insane struct file_ra_state f_ra. I wish this structure were dynamically allocated only for files that really use it)


How about you submit a patch for that instead?
-Andi

OK, could you please comment this patch ?

The problem of dynamically allocating the readahead state data is that the 
allocation can fail and should not be fatal.
I made some choices that might be not good.

I also chose not to align "file_ra" slab on SLAB_HWCACHE_ALIGN because the object size is 10*sizeof(long), so alignment would loose 6*sizeof(long) bytes for each object.


[PATCH]

* struct file cleanup : the very large file_ra_state is now allocated only on demand, 
using a dedicated "file_ra" slab.
        64bits machines handling lot of sockets can save about 72 bytes per 
file.
* private_data : The field is moved close to f_count and f_op fields to speedup 
sockfd_lookups

Thank you

Signed-off-by: Eric Dumazet <[EMAIL PROTECTED]>



--- linux-2.6.13-rc6/include/linux/fs.h 2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/include/linux/fs.h      2005-08-18 01:33:04.000000000 
+0200
@@ -586,20 +586,19 @@
        struct dentry           *f_dentry;
        struct vfsmount         *f_vfsmnt;
        struct file_operations  *f_op;
+       void                    *private_data;
        atomic_t                f_count;
        unsigned int            f_flags;
        mode_t                  f_mode;
        loff_t                  f_pos;
        struct fown_struct      f_owner;
        unsigned int            f_uid, f_gid;
-       struct file_ra_state    f_ra;
+       struct file_ra_state    *f_rap;
 
        size_t                  f_maxcount;
        unsigned long           f_version;
        void                    *f_security;
 
-       /* needed for tty driver, and maybe others */
-       void                    *private_data;
 
 #ifdef CONFIG_EPOLL
        /* Used by fs/eventpoll.c to link all the hooks to this file */
@@ -1514,8 +1513,7 @@
 extern void do_generic_mapping_read(struct address_space *mapping,
                                    struct file_ra_state *, struct file *,
                                    loff_t *, read_descriptor_t *, 
read_actor_t);
-extern void
-file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
+extern struct file_ra_state *file_ra_state_init(struct file *);
 extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb,
        const struct iovec *iov, loff_t offset, unsigned long nr_segs);
 extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, 
@@ -1549,8 +1547,10 @@
                                        read_descriptor_t * desc,
                                        read_actor_t actor)
 {
+       if (filp->f_rap == NULL)
+               file_ra_state_init(filp);
        do_generic_mapping_read(filp->f_mapping,
-                               &filp->f_ra,
+                               filp->f_rap,
                                filp,
                                ppos,
                                desc,
--- linux-2.6.13-rc6/include/linux/slab.h       2005-08-07 20:18:56.000000000 
+0200
+++ linux-2.6.13-rc6-ed/include/linux/slab.h    2005-08-18 00:37:59.000000000 
+0200
@@ -125,6 +125,7 @@
 extern kmem_cache_t    *names_cachep;
 extern kmem_cache_t    *files_cachep;
 extern kmem_cache_t    *filp_cachep;
+extern kmem_cache_t    *ra_cachep;
 extern kmem_cache_t    *fs_cachep;
 extern kmem_cache_t    *signal_cachep;
 extern kmem_cache_t    *sighand_cachep;
--- linux-2.6.13-rc6/mm/readahead.c     2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/readahead.c  2005-08-18 01:14:11.000000000 +0200
@@ -29,14 +29,20 @@
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
 /*
- * Initialise a struct file's readahead state.  Assumes that the caller has
- * memset *ra to zero.
+ * Initialise a struct file's readahead state.
  */
-void
-file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
+struct file_ra_state *
+file_ra_state_init(struct file *file)
 {
-       ra->ra_pages = mapping->backing_dev_info->ra_pages;
-       ra->prev_page = -1;
+       struct file_ra_state *ra = kmem_cache_alloc(ra_cachep, GFP_KERNEL);
+
+       if (ra != NULL) {
+               file->f_rap = ra;
+               memset(ra, 0, sizeof(*ra));
+               ra->ra_pages = 
file->f_mapping->host->i_mapping->backing_dev_info->ra_pages;
+               ra->prev_page = -1;
+       }
+       return ra;
 }
 
 /*
--- linux-2.6.13-rc6/mm/filemap.c       2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/filemap.c    2005-08-18 01:33:58.000000000 +0200
@@ -711,7 +711,7 @@
  * NULL.
  */
 void do_generic_mapping_read(struct address_space *mapping,
-                            struct file_ra_state *_ra,
+                            struct file_ra_state *_rap,
                             struct file *filp,
                             loff_t *ppos,
                             read_descriptor_t *desc,
@@ -727,8 +727,15 @@
        loff_t isize;
        struct page *cached_page;
        int error;
-       struct file_ra_state ra = *_ra;
+       struct file_ra_state ra;
 
+       if (likely(_rap != NULL))
+               ra = *_rap;
+       else {
+               memset(&ra, 0, sizeof(ra));
+               ra.prev_page = -1;
+               ra.ra_pages = default_backing_dev_info.ra_pages;
+       }
        cached_page = NULL;
        index = *ppos >> PAGE_CACHE_SHIFT;
        next_index = index;
@@ -908,7 +915,8 @@
        }
 
 out:
-       *_ra = ra;
+       if (_rap != NULL)
+               *_rap = ra;
 
        *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
        if (cached_page)
@@ -1187,12 +1195,15 @@
        int error;
        struct file *file = area->vm_file;
        struct address_space *mapping = file->f_mapping;
-       struct file_ra_state *ra = &file->f_ra;
+       struct file_ra_state *ra = file->f_rap;
        struct inode *inode = mapping->host;
        struct page *page;
        unsigned long size, pgoff;
        int did_readaround = 0, majmin = VM_FAULT_MINOR;
 
+       if (ra == NULL)
+               ra = file_ra_state_init(file);
+
        pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
 
 retry_all:
@@ -1225,14 +1236,16 @@
                        handle_ra_miss(mapping, ra, pgoff);
                        goto no_cached_page;
                }
-               ra->mmap_miss++;
+               if (ra != NULL) {
+                       ra->mmap_miss++;
 
-               /*
-                * Do we miss much more than hit in this file? If so,
-                * stop bothering with read-ahead. It will only hurt.
-                */
-               if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
-                       goto no_cached_page;
+                       /*
+                        * Do we miss much more than hit in this file? If so,
+                        * stop bothering with read-ahead. It will only hurt.
+                        */
+                       if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
+                               goto no_cached_page;
+               }
 
                /*
                 * To keep the pgmajfault counter straight, we need to
@@ -1243,7 +1256,7 @@
                        inc_page_state(pgmajfault);
                }
                did_readaround = 1;
-               ra_pages = max_sane_readahead(file->f_ra.ra_pages);
+               ra_pages = max_sane_readahead(ra != NULL ? ra->ra_pages : 
default_backing_dev_info.ra_pages);
                if (ra_pages) {
                        pgoff_t start = 0;
 
@@ -1256,7 +1269,7 @@
                        goto no_cached_page;
        }
 
-       if (!did_readaround)
+       if (!did_readaround && ra != NULL)
                ra->mmap_hit++;
 
        /*
--- linux-2.6.13-rc6/mm/fadvise.c       2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/fadvise.c    2005-08-18 01:05:15.000000000 +0200
@@ -28,6 +28,7 @@
        struct file *file = fget(fd);
        struct address_space *mapping;
        struct backing_dev_info *bdi;
+       struct file_ra_state *ra;
        loff_t endbyte;
        pgoff_t start_index;
        pgoff_t end_index;
@@ -54,15 +55,20 @@
 
        bdi = mapping->backing_dev_info;
 
+       if ((ra = file->f_rap) == NULL)
+               ra = file_ra_state_init(file);
        switch (advice) {
        case POSIX_FADV_NORMAL:
-               file->f_ra.ra_pages = bdi->ra_pages;
+               if (ra != NULL)
+                       ra->ra_pages = bdi->ra_pages;
                break;
        case POSIX_FADV_RANDOM:
-               file->f_ra.ra_pages = 0;
+               if (ra != NULL)
+                       ra->ra_pages = 0;
                break;
        case POSIX_FADV_SEQUENTIAL:
-               file->f_ra.ra_pages = bdi->ra_pages * 2;
+               if (ra != NULL)
+                       ra->ra_pages = bdi->ra_pages * 2;
                break;
        case POSIX_FADV_WILLNEED:
        case POSIX_FADV_NOREUSE:
--- linux-2.6.13-rc6/fs/open.c  2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/open.c       2005-08-18 01:05:15.000000000 +0200
@@ -804,7 +804,7 @@
        }
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
-       file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
+       f->f_rap = NULL;
 
        /* NB: we're sure to have correct a_ops only after f_op->open */
        if (f->f_flags & O_DIRECT) {
--- linux-2.6.13-rc6/fs/file_table.c    2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/file_table.c 2005-08-18 00:37:59.000000000 +0200
@@ -55,6 +55,8 @@
 
 static inline void file_free(struct file *f)
 {
+       if (f->f_rap != NULL)
+               kmem_cache_free(ra_cachep, f->f_rap);
        kmem_cache_free(filp_cachep, f);
 }
 
--- linux-2.6.13-rc6/fs/dcache.c        2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/dcache.c     2005-08-18 02:22:56.000000000 +0200
@@ -1705,6 +1705,8 @@
 
 /* SLAB cache for file structures */
 kmem_cache_t *filp_cachep;
+/* SLAB cache for ra structures */
+kmem_cache_t *ra_cachep;
 
 EXPORT_SYMBOL(d_genocide);
 
@@ -1733,6 +1735,9 @@
        filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
 
+       ra_cachep = kmem_cache_create("file_ra", sizeof(struct file_ra_state), 
0,
+                       SLAB_PANIC, NULL, NULL);
+
        dcache_init(mempages);
        inode_init(mempages);
        files_init(mempages);

Reply via email to