Re: [PATCH 22/28] mm: add support for non block device backed swap files

2008-02-26 Thread Peter Zijlstra

On Tue, 2008-02-26 at 13:45 +0100, Miklos Szeredi wrote:
> Starting review in the middle, because this is the part I'm most
> familiar with.
> 
> > New addres_space_operations methods are added:
> >   int swapfile(struct address_space *, int);
> 
> Separate ->swapon() and ->swapoff() methods would be so much cleaner IMO.

I'm ok with that, but its a_ops bloat, do we care about that? I guess
since it has limited instances - typically one per filesystem - there is
no issue here.

> Also is there a reason why 'struct file *' cannot be supplied to these
> functions?

No real reason here. I guess its cleaner indeed. Thanks.

> > +int swap_set_page_dirty(struct page *page)
> > +{
> > +   struct swap_info_struct *sis = page_swap_info(page);
> > +
> > +   if (sis->flags & SWP_FILE) {
> > +   const struct address_space_operations *a_ops =
> > +   sis->swap_file->f_mapping->a_ops;
> > +   int (*spd)(struct page *) = a_ops->set_page_dirty;
> > +#ifdef CONFIG_BLOCK
> > +   if (!spd)
> > +   spd = __set_page_dirty_buffers;
> > +#endif
> 
> This ifdef is not really needed.  Just require ->set_page_dirty() be
> filled in by filesystems which want swapfiles (and others too, in the
> longer term, the fallback is just historical crud).

Agreed. This is a good motivation to clean up that stuff.

> Here's an incremental patch addressing these issues and beautifying
> the new code.

Thanks, I'll fold it into the patch and update the documentation. I'll
put your creds in akpm style.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 22/28] mm: add support for non block device backed swap files

2008-02-26 Thread Miklos Szeredi
Starting review in the middle, because this is the part I'm most
familiar with.

> New addres_space_operations methods are added:
>   int swapfile(struct address_space *, int);

Separate ->swapon() and ->swapoff() methods would be so much cleaner IMO.

Also is there a reason why 'struct file *' cannot be supplied to these
functions?

[snip]

> +int swap_set_page_dirty(struct page *page)
> +{
> + struct swap_info_struct *sis = page_swap_info(page);
> +
> + if (sis->flags & SWP_FILE) {
> + const struct address_space_operations *a_ops =
> + sis->swap_file->f_mapping->a_ops;
> + int (*spd)(struct page *) = a_ops->set_page_dirty;
> +#ifdef CONFIG_BLOCK
> + if (!spd)
> + spd = __set_page_dirty_buffers;
> +#endif

This ifdef is not really needed.  Just require ->set_page_dirty() be
filled in by filesystems which want swapfiles (and others too, in the
longer term, the fallback is just historical crud).

Here's an incremental patch addressing these issues and beautifying
the new code.

Signed-off-by: Miklos Szeredi <[EMAIL PROTECTED]>

Index: linux/mm/page_io.c
===
--- linux.orig/mm/page_io.c 2008-02-26 11:15:58.0 +0100
+++ linux/mm/page_io.c  2008-02-26 13:40:55.0 +0100
@@ -106,8 +106,10 @@ int swap_writepage(struct page *page, st
}
 
if (sis->flags & SWP_FILE) {
-   ret = sis->swap_file->f_mapping->
-   a_ops->swap_out(sis->swap_file, page, wbc);
+   struct file *swap_file = sis->swap_file;
+   struct address_space *mapping = swap_file->f_mapping;
+
+   ret = mapping->a_ops->swap_out(swap_file, page, wbc);
if (!ret)
count_vm_event(PSWPOUT);
return ret;
@@ -136,12 +138,13 @@ void swap_sync_page(struct page *page)
struct swap_info_struct *sis = page_swap_info(page);
 
if (sis->flags & SWP_FILE) {
-   const struct address_space_operations *a_ops =
-   sis->swap_file->f_mapping->a_ops;
-   if (a_ops->sync_page)
-   a_ops->sync_page(page);
-   } else
+   struct address_space *mapping = sis->swap_file->f_mapping;
+
+   if (mapping->a_ops->sync_page)
+   mapping->a_ops->sync_page(page);
+   } else {
block_sync_page(page);
+   }
 }
 
 int swap_set_page_dirty(struct page *page)
@@ -149,17 +152,12 @@ int swap_set_page_dirty(struct page *pag
struct swap_info_struct *sis = page_swap_info(page);
 
if (sis->flags & SWP_FILE) {
-   const struct address_space_operations *a_ops =
-   sis->swap_file->f_mapping->a_ops;
-   int (*spd)(struct page *) = a_ops->set_page_dirty;
-#ifdef CONFIG_BLOCK
-   if (!spd)
-   spd = __set_page_dirty_buffers;
-#endif
-   return (*spd)(page);
-   }
+   struct address_space *mapping = sis->swap_file->f_mapping;
 
-   return __set_page_dirty_nobuffers(page);
+   return mapping->a_ops->set_page_dirty(page);
+   } else {
+   return __set_page_dirty_nobuffers(page);
+   }
 }
 
 int swap_readpage(struct file *file, struct page *page)
@@ -172,8 +170,10 @@ int swap_readpage(struct file *file, str
BUG_ON(PageUptodate(page));
 
if (sis->flags & SWP_FILE) {
-   ret = sis->swap_file->f_mapping->
-   a_ops->swap_in(sis->swap_file, page);
+   struct file *swap_file = sis->swap_file;
+   struct address_space *mapping = swap_file->f_mapping;
+
+   ret = mapping->a_ops->swap_in(swap_file, page);
if (!ret)
count_vm_event(PSWPIN);
return ret;
Index: linux/include/linux/fs.h
===
--- linux.orig/include/linux/fs.h   2008-02-26 11:15:58.0 +0100
+++ linux/include/linux/fs.h2008-02-26 13:29:40.0 +0100
@@ -485,7 +485,8 @@ struct address_space_operations {
/*
 * swapfile support
 */
-   int (*swapfile)(struct address_space *, int);
+   int (*swapon)(struct file *file);
+   int (*swapoff)(struct file *file);
int (*swap_out)(struct file *file, struct page *page,
struct writeback_control *wbc);
int (*swap_in)(struct file *file, struct page *page);
Index: linux/mm/swapfile.c
===
--- linux.orig/mm/swapfile.c2008-02-26 12:43:57.0 +0100
+++ linux/mm/swapfile.c 2008-02-26 13:34:57.0 +0100
@@ -1014,9 +1014,11 @@ static void destroy_swap_extents(struct 
}
 
if (sis->flags & SWP_FILE) {
+   struct file *swap_file = sis->swap

Re: [PATCH 22/28] mm: add support for non block device backed swap files

2008-02-20 Thread Peter Zijlstra

On Wed, 2008-02-20 at 08:30 -0800, Randy Dunlap wrote:
> On Wed, 20 Feb 2008 15:46:32 +0100 Peter Zijlstra wrote:

< grammar mistakes >

Thanks Randy!

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 22/28] mm: add support for non block device backed swap files

2008-02-20 Thread Randy Dunlap
On Wed, 20 Feb 2008 15:46:32 +0100 Peter Zijlstra wrote:

> Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]>
> ---
>  Documentation/filesystems/Locking |   19 +
>  Documentation/filesystems/vfs.txt |   17 
>  include/linux/buffer_head.h   |2 -
>  include/linux/fs.h|8 +
>  include/linux/swap.h  |4 ++
>  mm/page_io.c  |   52 
> ++
>  mm/swap_state.c   |4 +-
>  mm/swapfile.c |   26 ++-
>  8 files changed, 128 insertions(+), 4 deletions(-)

> Index: linux-2.6/Documentation/filesystems/Locking
> ===
> --- linux-2.6.orig/Documentation/filesystems/Locking
> +++ linux-2.6/Documentation/filesystems/Locking

> @@ -291,6 +297,19 @@ cleaned, or an error value if not. Note 
>  getting mapped back in and redirtied, it needs to be kept locked
>  across the entire operation.
>  
> + ->swapfile() will be called with a non zero argument on address spaces

   non-zero

> +backing non block device backed swapfiles. A return value of zero indicates
> +success. In which case this address space can be used for backing swapspace.

   success, in which case

> +The swapspace operations will be proxied to the address space operations.
> +Swapoff will call this method with a zero argument to release the address
> +space.
> +
> + ->swap_out() when swapfile() returned success, this method is used to
> +write the swap page.
> +
> + ->swap_in() when swapfile() returned success, this method is used to
> +read the swap page.
> +
>   Note: currently almost all instances of address_space methods are
>  using BKL for internal serialization and that's one of the worst sources
>  of contention. Normally they are calling library functions (in fs/buffer.c)

> Index: linux-2.6/Documentation/filesystems/vfs.txt
> ===
> --- linux-2.6.orig/Documentation/filesystems/vfs.txt
> +++ linux-2.6/Documentation/filesystems/vfs.txt
> @@ -728,6 +732,19 @@ struct address_space_operations {
>   prevent redirtying the page, it is kept locked during the whole
>   operation.
>  
> +  swapfile: Called with a non-zero argument when swapon is used on a file. A
> + return value of zero indicates success. In which case this

   success, in which case this

> + address_space can be used to back swapspace. The swapspace operations
> + will be proxied to this address space's ->swap_{out,in} methods.
> + Swapoff will call this method with a zero argument to release the
> + address space.
> +
> +  swap_out: Called to write a swapcache page to a backing store, similar to
> + writepage.
> +
> +  swap_in: Called to read a swapcache page from a backing store, similar to
> + readpage.
> +
>  The File Object
>  ===

---
~Randy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 22/28] mm: add support for non block device backed swap files

2008-02-20 Thread Peter Zijlstra
New addres_space_operations methods are added:
  int swapfile(struct address_space *, int);
  int swap_out(struct file *, struct page *, struct writeback_control *);
  int swap_in(struct file *, struct page *);

When during sys_swapon() the swapfile() method is found and returns no error
the swapper_space.a_ops will proxy to sis->swap_file->f_mapping->a_ops, and
make use of swap_{out,in}() to write/read swapcache pages.

The swapfile method will be used to communicate to the address_space that the
VM relies on it, and the address_space should take adequate measures (like 
reserving memory for mempools or the like).

This new interface can be used to obviate the need for ->bmap in the swapfile
code. A filesystem would need to load (and maybe even allocate) the full block
map for a file into memory and pin it there on ->swapfile(,1) so that
->swap_{out,in}() have instant access to it. It can be released on
->swapfile(,0).

The reason to provide ->swap_{out,in}() over using {write,read}page() is to
 1) make a distinction between swapcache and pagecache pages, and
 2) to provide a struct file * for credential context (normally not needed
in the context of writepage, as the page content is normally dirtied
using either of the following interfaces:
  write_{begin,end}()
  {prepare,commit}_write()
  page_mkwrite()
which do have the file context.

Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]>
---
 Documentation/filesystems/Locking |   19 +
 Documentation/filesystems/vfs.txt |   17 
 include/linux/buffer_head.h   |2 -
 include/linux/fs.h|8 +
 include/linux/swap.h  |4 ++
 mm/page_io.c  |   52 ++
 mm/swap_state.c   |4 +-
 mm/swapfile.c |   26 ++-
 8 files changed, 128 insertions(+), 4 deletions(-)

Index: linux-2.6/include/linux/swap.h
===
--- linux-2.6.orig/include/linux/swap.h
+++ linux-2.6/include/linux/swap.h
@@ -120,6 +120,7 @@ enum {
SWP_USED= (1 << 0), /* is slot in swap_info[] used? */
SWP_WRITEOK = (1 << 1), /* ok to write to this swap?*/
SWP_ACTIVE  = (SWP_USED | SWP_WRITEOK),
+   SWP_FILE= (1 << 2), /* file swap area */
/* add others here before... */
SWP_SCANNING= (1 << 8), /* refcount in scan_swap_map */
 };
@@ -217,6 +218,8 @@ extern void swap_unplug_io_fn(struct bac
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct file *, struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
+extern void swap_sync_page(struct page *page);
+extern int swap_set_page_dirty(struct page *page);
 extern void end_swap_bio_read(struct bio *bio, int err);
 
 /* linux/mm/swap_state.c */
@@ -250,6 +253,7 @@ extern unsigned int count_swap_pages(int
 extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
 extern sector_t swapdev_block(int, pgoff_t);
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
+extern struct swap_info_struct *page_swap_info(struct page *);
 extern int can_share_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);
 struct backing_dev_info;
Index: linux-2.6/mm/page_io.c
===
--- linux-2.6.orig/mm/page_io.c
+++ linux-2.6/mm/page_io.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
@@ -97,11 +98,21 @@ int swap_writepage(struct page *page, st
 {
struct bio *bio;
int ret = 0, rw = WRITE;
+   struct swap_info_struct *sis = page_swap_info(page);
 
if (remove_exclusive_swap_page(page)) {
unlock_page(page);
goto out;
}
+
+   if (sis->flags & SWP_FILE) {
+   ret = sis->swap_file->f_mapping->
+   a_ops->swap_out(sis->swap_file, page, wbc);
+   if (!ret)
+   count_vm_event(PSWPOUT);
+   return ret;
+   }
+
bio = get_swap_bio(GFP_NOIO, page_private(page), page,
end_swap_bio_write);
if (bio == NULL) {
@@ -120,13 +131,54 @@ out:
return ret;
 }
 
+void swap_sync_page(struct page *page)
+{
+   struct swap_info_struct *sis = page_swap_info(page);
+
+   if (sis->flags & SWP_FILE) {
+   const struct address_space_operations *a_ops =
+   sis->swap_file->f_mapping->a_ops;
+   if (a_ops->sync_page)
+   a_ops->sync_page(page);
+   } else
+   block_sync_page(page);
+}
+
+int swap_set_page_dirty(struct page *page)
+{
+   struct swap_info_struct *sis = page_swap_info(page);
+
+   if (sis->fla