Re: new VFS method sync_page and stacking

2000-05-05 Thread Roman V. Shaposhnick

On Tue, May 02, 2000 at 09:09:28PM +0100, Steve Dodd wrote:
> 
> OK, I think I understand now :) Would you like a patch to change to the void *
> parameter, or will you do this yourself?

  I guess I've done it. In the attachment there is a patch that implements 
just that thing Al and others were talking about. All fs' seems to be ok.

  For me it looks ok, but may be I've missed something ? Well, if no, may be
we can include it in pre7-6 ?

Thanks,
Roman.

P.S. Of course, it's against pre7-5.


diff -ur linux-orig/fs/adfs/inode.c linux/fs/adfs/inode.c
--- linux-orig/fs/adfs/inode.c  Wed May  3 15:47:38 2000
+++ linux/fs/adfs/inode.c   Wed May  3 18:07:03 2000
@@ -49,17 +49,17 @@
return 0;
 }
 
-static int adfs_writepage(struct file *file, struct dentry *dentry, struct page *page)
+static int adfs_writepage(void *cookie, struct page *page)
 {
return block_write_full_page(page, adfs_get_block);
 }
 
-static int adfs_readpage(struct dentry *dentry, struct page *page)
+static int adfs_readpage(void *cookie, struct page *page)
 {
return block_read_full_page(page, adfs_get_block);
 }
 
-static int adfs_prepare_write(struct file *file, struct page *page, unsigned int 
from, unsigned int to)
+static int adfs_prepare_write(void *cookie, struct page *page, unsigned int from, 
+unsigned int to)
 {
return cont_prepare_write(page, from, to, adfs_get_block,
&((struct inode *)page->mapping->host)->u.adfs_i.mmu_private);
diff -ur linux-orig/fs/affs/file.c linux/fs/affs/file.c
--- linux-orig/fs/affs/file.c   Wed May  3 15:47:35 2000
+++ linux/fs/affs/file.cWed May  3 18:13:53 2000
@@ -338,15 +338,15 @@
 
 }

-static int affs_writepage(struct file *file, struct dentry *dentry, struct page *page)
+static int affs_writepage(void *cookie, struct page *page)
 {
return block_write_full_page(page,affs_get_block);
 }
-static int affs_readpage(struct dentry *dentry, struct page *page)
+static int affs_readpage(void *cookie, struct page *page)
 {
return block_read_full_page(page,affs_get_block);
 }
-static int affs_prepare_write(struct file *file, struct page *page, unsigned from, 
unsigned to)
+static int affs_prepare_write(void *cookie, struct page *page, unsigned from, 
+unsigned to)
 {
return cont_prepare_write(page,from,to,affs_get_block,
&((struct inode*)page->mapping->host)->u.affs_i.mmu_private);
diff -ur linux-orig/fs/affs/symlink.c linux/fs/affs/symlink.c
--- linux-orig/fs/affs/symlink.cWed May  3 15:47:35 2000
+++ linux/fs/affs/symlink.c Wed May  3 18:16:04 2000
@@ -15,10 +15,10 @@
 #include 
 #include 
 
-static int affs_symlink_readpage(struct dentry *dentry, struct page *page)
+static int affs_symlink_readpage(void *cookie, struct page *page)
 {
struct buffer_head *bh;
-   struct inode *inode = dentry->d_inode;
+   struct inode *inode = (struct inode*)page->mapping->host;
char *link = (char*)kmap(page);
struct slink_front *lf;
int err;
diff -ur linux-orig/fs/bfs/file.c linux/fs/bfs/file.c
--- linux-orig/fs/bfs/file.cWed May  3 15:47:38 2000
+++ linux/fs/bfs/file.c Wed May  3 18:17:27 2000
@@ -127,17 +127,17 @@
return err;
 }
 
-static int bfs_writepage(struct file *file, struct dentry *dentry, struct page *page)
+static int bfs_writepage(void *cookie, struct page *page)
 {
return block_write_full_page(page, bfs_get_block);
 }
 
-static int bfs_readpage(struct dentry *dentry, struct page *page)
+static int bfs_readpage(void *cookie, struct page *page)
 {
return block_read_full_page(page, bfs_get_block);
 }
 
-static int bfs_prepare_write(struct file *file, struct page *page, unsigned from, 
unsigned to)
+static int bfs_prepare_write(void *cookie, struct page *page, unsigned from, unsigned 
+to)
 {
return block_prepare_write(page, from, to, bfs_get_block);
 }
diff -ur linux-orig/fs/buffer.c linux/fs/buffer.c
--- linux-orig/fs/buffer.c  Wed May  3 15:47:34 2000
+++ linux/fs/buffer.c   Wed May  3 19:11:57 2000
@@ -1691,7 +1691,7 @@
return err;
 }
 
-int generic_commit_write(struct file *file, struct page *page,
+int generic_commit_write(void *cookie, struct page *page,
unsigned from, unsigned to)
 {
struct inode *inode = (struct inode*)page->mapping->host;
@@ -2412,7 +2412,7 @@
return 0;
 }
 
-int block_sync_page(struct page *page)
+int block_sync_page(void *cookie, struct page *page)
 {
run_task_queue(&tq_disk);
return 0;
diff -ur linux-orig/fs/coda/symlink.c linux/fs/coda/symlink.c
--- linux-orig/fs/coda/symlink.cWed May  3 15:47:35 2000
+++ linux/fs/coda/symlink.c Wed May  3 18:52:06 2000
@@ -22,9 +22,9 @@
 #include 
 #include 
 
-static int coda_symlink_filler(struct dentry *dentry, struct page *page)
+static int coda_symlink_filler(void *cookie, struct page *page)
 {
-   struct inode *inode = dentry->d_inode;
+   struct inode *inode = (

Re: new VFS method sync_page and stacking

2000-05-02 Thread Steve Dodd

On Mon, May 01, 2000 at 11:54:20AM -0400, Alexander Viro wrote:

> Theory: different clients may want to share the cache for remote file.
> They use some authentication to sign the RPC requests. I.e. you need to
> have some token passed to the methods. struct file * is bogus here - it's
> void * and it should be ignored by almost every address_space.

OK, that sounds fine.

> generic_write_file() has every reason to expect that a-s it deals with
> accepts file as a token (if it needs the thing at all), but that's
> generic_file_write(). mm/filemap.c contains a lot of stuff that is
> completely generic _and_ some file-related pieces. Yes, it needs further
> cleanups and separation.

How about splitting the generic page cache parts of filemap.c into a
mm/page_cache.c? Maybe even move the resulting filemap.c into ../fs.

[..]
> > I disagree; at the moment the interface is not generic, but my belief is
> > that it can be made so with only small changes to existing code. However,
> > I haven't looked at all existing code yet. NFS is doing something odd, and I
> > also should look at your code if that's possible (is it available
> > somewhere?)
> 
> RPC signing...

OK, I think I understand now :) Would you like a patch to change to the void *
parameter, or will you do this yourself?

S.



Re: new VFS method sync_page and stacking

2000-05-02 Thread Ion Badulescu

In article [EMAIL PROTECTED]> 
you wrote:

> So I'm now leaning more towards losing the struct file/dentry from the
> address_space ops.  Furthermore, since the address_space structure showed up
> relatively recently, we might consider cleaning up this API before 2.4.  I
> believe my stacking code would work fine w/o these struct file/dentry being
> passed around (Ion, can you verify this please?)

As long as the interface doesn't require a struct file *, we don't require it
either, as we don't keep any info in a file's private data (aside from the
obvious pointer to the stacked file structure).


Ion
-- 
  It is better to keep your mouth shut and be thought a fool,
than to open it and remove all doubt.



Re: new VFS method sync_page and stacking

2000-05-02 Thread Roman V. Shaposhnick

On Mon, May 01, 2000 at 02:19:24PM -0400, Alexander Viro wrote:
> >
> > 
> >   Ooops! I mean, aggregation, of course, but nevertheless I just try to 
> > express the idea using less possible number of words. 
> >   Ok, what about the idea ?
> 
> Tokens. Opaque tokens. Interface should not care about them. Callers must
> know what they are dealing with, indeed.

  Al, the only thing I worry about is stable interface. I do not care about
"struct file *" or something like that just because I hope that with reasonable
and stable interface I can do what I want ( efficiency? well, I can sacrifice 
it ;) ). So please, keep it coherent and stable. I mean, if you start adding
"struct file *" add it to the readpage in pre7. Ok ?

> > > > > Inode data pages are per-inode, not per-dentry or per-file-struct.
> > > > 
> > > >   Frankly, inode data pages are file pages, because it is userspace files we
> > > > care of. Nothing more, nothing less. 
> > > 
> > >   You've missed the point here. We cache the data on the client
> > > side. _All_ openers share that cache. 
> > 
> >You mean local, client side openers ? 
> 
> Yep.
> 
> > > IOW, we have a chance that data submitted by one of them will be sent with 
> > > credentials of another. Nothing to do here.
> > 
> >For now? Yes. But if we would be able to use "struct file *" than we could 
> > store all meta information on a per opener basis. That's the idea. And we
> > could always separate one opener from another by syncing each and every
> > operation. Yes?
> 
> All meta information? Consider mmap(). You have a dirty page. 

  Ok. Let's see how can we get this page: 
  1. client generate write(2) and generic_file_write writes to 
 the memory region. In this case we ought to generate Rwrite 
 immediately. Yes ? 
  2. client writes to the memory region ( in case of previous mmap  )
 *correct me if I am wrong* but I guess that it is up to the vm
 subsystem to request sync, isn't it ? If yes, than we need not
 generate Rwrite, just after the page turns to be dirty. Yes ?

> It might be dirtied by a process that has it mapped. It might be dirtied by 
> direct write() by another opener. You end up with the need to generate Rwrite.
> And no way to separate the data from different openers. So either you use
> a new cache for each opener (_big_ waste of memory + hell of coherency
> problems) or you end up with the described effect.

I do understand what you mean ( we can not tell what opener wrote to the 
page  ) but I can not figure out what harm this effect could to to the
system [performance]. Tell me, please.

Thanks,
Roman.



Re: new VFS method sync_page and stacking

2000-05-01 Thread Roman V. Shaposhnick

On Mon, May 01, 2000 at 01:57:23AM -0700, Hans Reiser wrote:
> "Roman V. Shaposhnick" wrote:
> :
> > 
> >  1. In UNIX everything is a file. Thus we need just one type of a
> > cache -- cache for files that can be cached.
> 
> Directories and other metadata are not necessarily implemented as files.  

  Directories are files, indeed. Read-only files. Sometimes with special
alignment rules for the reader, but they are files. I can say nothing 
about other metadata, because I was speaking about generic usecase. And other
metadata is "other". But you are right sometimes it can fit into a different
model, but it is up to an implementation to chose one. 

Thanks,
Roman.



Re: new VFS method sync_page and stacking

2000-05-01 Thread Roman V. Shaposhnick

On Mon, May 01, 2000 at 02:33:58PM -0400, Benjamin C.R. LaHaise wrote:
> On Mon, 1 May 2000, Roman V. Shaposhnick wrote:
> 
> >   I see what you mean. And I completely agree with only one exception: we
> > should not use and we should not think of address_space as a generic cache
> > till we implement the interface for address_space_operations that:
> >  
> > 1. can work with *any* type of host object
> > 2. at the same time can work with stackable or derived ( in C++
> >terminology ) host objects like file->dentry->inode->phis.
> > 3. and has a reasonable and kludge-free specification.
> > 
> >   I agree that providing such interface to the address_space will simplify 
> > things a lot, but for now I see no evidence of somebody doing this. 
> 
> If we remove the struct file *'s from the interface, it becomes quite
> trivial to do this.

  It is not hard to remove struct file *'s from the interface, but, please,
specify, what we will get instead ? 

> > > Hmm. Take ->readpage() for example. It is used to fill a page which "belongs"
> > > to an object. That object is referenced in page->mapping->host. For inode
> > > data, the host is the inode structure. When should readpage() ever need to
> > > see anything other than the object to which the pages belong? It doesn't make
> > > sense (to me). 
> > 
> >I disagree; and mainly because sometimes it is hard to tell where object 
> > "begins" and where it "ends". Complex objects  are often  derived from 
> > simple  and it is a very common practice to use the highest level available
> > for the sake of optimization. E.g. in Linux userspace file is built around 
> > dentry which in turn is built around inode etc. Can we say that file do not
> > include inode directly ? Yes. Can we say that file do not include inode at
> > all? No. 
> > 
> >  Let me show you an example. Consider, that:
> >1. you have a network-based filesystem with a state oriented protocol. 
> >2. to do something with a file you need a special pointer to it called 
> >   file_id.
> >3. before reading/writing to/from a file you should open its file_id
> >   for reading, writing or both. After you open file_id you can only
> >   read/write from/to it but you can not do nothing more.
> >
> >  I guess you will agree with me that  the best place for "opened" file_id
> > is a file->private_data ? Ok. Now the question is, how can I access opened
> > file_id if all methods ( readpage, writepage, prepare_write and commit_write ) 
> > get the first argument of type inode ?
> 
> Holding the file_id in file->private_data is just plain wrong in the
> presence of delayed writes (especially on NFS).  Consider the case where a
> temporary file is opened and deleted but then used for either read/write
> or backing an mmap. Does it not make sense that when the file is finally
> close()'d/munmap()'d by the last user that the contents should be merely
> discarded?  

   I guess what you suggest is something like:

  fd = open("file", O_RDWR);
  unlink("file");
  read(fd, buff, count);
  ...
  ...
  close(fd).

If yes than it does not make perfect sense to discard file contents because 
in any case we are *bound* to transfer it to the server side just because 
it may has different users on that side too.

> If you're tieing the ability to complete write()'s to the
> struct file * that was opened, you'll never be able to detect this case
> (which is trivial if the file_id token is placed in the inode or address
> space).  

Yes. I store a special "clean" file_id there, and if there is no better
way I can repeat damn procedure ( clone file_id, open copy, read/write, remove
copy ) but from my point of view this is definitely waste of network capacity.
NFS ( no file security ) handles those issues because it is stateless. And I
can not because it is a waste of resources to pretend being quasi-stateless.

> An address_space should be able to do everything without
> requiring a struct file.  Granted, this raises the problem that Al pointed
> out about signing RPC requests, but if a copy of the authentication token
> is made at open() time into the lower layer for the filesystem, this
> problem goes away.
> This is important because you can't have a struct file when getting calls
> from the page reclaim action in shrink_mmap.

  That's right. Did you read my previous e-mail? I mean, that interface should 
provide highest possible level that's it. If shrink_mmap can only work at some
lower level -- it's ok. But that should be an exception not the rule.

> > > Inode data pages are per-inode, not per-dentry or per-file-struct.
> > 
> >   Frankly, inode data pages are file pages, because it is userspace files we
> > care of. Nothing more, nothing less. 
> 
> No, they are not.  address_spaces are a generic way of caching pages of a
> vm object.  Files happen to fit nicely into vm objects.

  If all you have is a h

Re: new VFS method sync_page and stacking

2000-05-01 Thread Hans Reiser

"Roman V. Shaposhnick" wrote:
:
> 
>  1. In UNIX everything is a file. Thus we need just one type of a
> cache -- cache for files that can be cached.

Directories and other metadata are not necessarily implemented as files.  

Hans



Re: new VFS method sync_page and stacking

2000-05-01 Thread Alexander Viro



On Mon, 1 May 2000, Roman V. Shaposhnick wrote:

> On Mon, May 01, 2000 at 01:50:58PM -0400, Alexander Viro wrote:
> > 
> > 
> > On Mon, 1 May 2000, Roman V. Shaposhnick wrote:
> > 
> > > 2. at the same time can work with stackable or derived ( in C++
> > >terminology ) host objects like file->dentry->inode->phis.
> > 
> > These are _not_ derived in C++ sense. Sorry.
> 
>   Ooops! I mean, aggregation, of course, but nevertheless I just try to 
> express the idea using less possible number of words. 
>   Ok, what about the idea ?

Tokens. Opaque tokens. Interface should not care about them. Callers must
know what they are dealing with, indeed.

> > > > Inode data pages are per-inode, not per-dentry or per-file-struct.
> > > 
> > >   Frankly, inode data pages are file pages, because it is userspace files we
> > > care of. Nothing more, nothing less. 
> > 
> > You've missed the point here. We cache the data on the client
> > side. _All_ openers share that cache. 
> 
>You mean local, client side openers ? 

Yep.

> > IOW, we have a chance that data submitted by one of them will be sent with 
> > credentials of another. Nothing to do here.
> 
>For now? Yes. But if we would be able to use "struct file *" than we could 
> store all meta information on a per opener basis. That's the idea. And we
> could always separate one opener from another by syncing each and every
> operation. Yes?

All meta information? Consider mmap(). You have a dirty page. It might be
dirtied by a process that has it mapped. It might be dirtied by direct
write() by another opener. You end up with the need to generate Rwrite.
And no way to separate the data from different openers. So either you use
a new cache for each opener (_big_ waste of memory + hell of coherency
problems) or you end up with the described effect.




Re: new VFS method sync_page and stacking

2000-05-01 Thread Roman V. Shaposhnick

On Mon, May 01, 2000 at 01:50:58PM -0400, Alexander Viro wrote:
> 
> 
> On Mon, 1 May 2000, Roman V. Shaposhnick wrote:
> 
> > 2. at the same time can work with stackable or derived ( in C++
> >terminology ) host objects like file->dentry->inode->phis.
> 
> These are _not_ derived in C++ sense. Sorry.

  Ooops! I mean, aggregation, of course, but nevertheless I just try to 
express the idea using less possible number of words. 
  Ok, what about the idea ?

> > > Inode data pages are per-inode, not per-dentry or per-file-struct.
> > 
> >   Frankly, inode data pages are file pages, because it is userspace files we
> > care of. Nothing more, nothing less. 
> 
>   You've missed the point here. We cache the data on the client
> side. _All_ openers share that cache. 

   You mean local, client side openers ? 

> IOW, we have a chance that data submitted by one of them will be sent with 
> credentials of another. Nothing to do here.

   For now? Yes. But if we would be able to use "struct file *" than we could 
store all meta information on a per opener basis. That's the idea. And we
could always separate one opener from another by syncing each and every
operation. Yes?

Thanks,
Roman.

P.S. I hope we are talking about same thing ? If not, example please.



Re: new VFS method sync_page and stacking

2000-05-01 Thread Benjamin C.R. LaHaise

On Mon, 1 May 2000, Roman V. Shaposhnick wrote:

>   I see what you mean. And I completely agree with only one exception: we
> should not use and we should not think of address_space as a generic cache
> till we implement the interface for address_space_operations that:
>  
> 1. can work with *any* type of host object
> 2. at the same time can work with stackable or derived ( in C++
>terminology ) host objects like file->dentry->inode->phis.
> 3. and has a reasonable and kludge-free specification.
> 
>   I agree that providing such interface to the address_space will simplify 
> things a lot, but for now I see no evidence of somebody doing this. 

If we remove the struct file *'s from the interface, it becomes quite
trivial to do this.

> > Hmm. Take ->readpage() for example. It is used to fill a page which "belongs"
> > to an object. That object is referenced in page->mapping->host. For inode
> > data, the host is the inode structure. When should readpage() ever need to
> > see anything other than the object to which the pages belong? It doesn't make
> > sense (to me). 
> 
>I disagree; and mainly because sometimes it is hard to tell where object 
> "begins" and where it "ends". Complex objects  are often  derived from 
> simple  and it is a very common practice to use the highest level available
> for the sake of optimization. E.g. in Linux userspace file is built around 
> dentry which in turn is built around inode etc. Can we say that file do not
> include inode directly ? Yes. Can we say that file do not include inode at
> all? No. 
> 
>  Let me show you an example. Consider, that:
>1. you have a network-based filesystem with a state oriented protocol. 
>2. to do something with a file you need a special pointer to it called 
>   file_id.
>3. before reading/writing to/from a file you should open its file_id
>   for reading, writing or both. After you open file_id you can only
>   read/write from/to it but you can not do nothing more.
>
>  I guess you will agree with me that  the best place for "opened" file_id
> is a file->private_data ? Ok. Now the question is, how can I access opened
> file_id if all methods ( readpage, writepage, prepare_write and commit_write ) 
> get the first argument of type inode ?

Holding the file_id in file->private_data is just plain wrong in the
presence of delayed writes (especially on NFS).  Consider the case where a
temporary file is opened and deleted but then used for either read/write
or backing an mmap.  Does it not make sense that when the file is finally
close()'d/munmap()'d by the last user that the contents should be merely
discarded?  If you're tieing the ability to complete write()'s to the
struct file * that was opened, you'll never be able to detect this case
(which is trivial if the file_id token is placed in the inode or address
space).  An address_space should be able to do everything without
requiring a struct file.  Granted, this raises the problem that Al pointed
out about signing RPC requests, but if a copy of the authentication token
is made at open() time into the lower layer for the filesystem, this
problem goes away.

This is important because you can't have a struct file when getting calls
from the page reclaim action in shrink_mmap.

> > Inode data pages are per-inode, not per-dentry or per-file-struct.
> 
>   Frankly, inode data pages are file pages, because it is userspace files we
> care of. Nothing more, nothing less. 

No, they are not.  address_spaces are a generic way of caching pages of a
vm object.  Files happen to fit nicely into vm objects.

-ben




Re: new VFS method sync_page and stacking

2000-05-01 Thread Alexander Viro



On Mon, 1 May 2000, Roman V. Shaposhnick wrote:

> 2. at the same time can work with stackable or derived ( in C++
>terminology ) host objects like file->dentry->inode->phis.

These are _not_ derived in C++ sense. Sorry.

> > Inode data pages are per-inode, not per-dentry or per-file-struct.
> 
>   Frankly, inode data pages are file pages, because it is userspace files we
> care of. Nothing more, nothing less. 

You've missed the point here. We cache the data on the client
side. _All_ openers share that cache. IOW, we have a chance that data
submitted by one of them will be sent with credentials of another. Nothing
to do here.




Re: new VFS method sync_page and stacking

2000-05-01 Thread Roman V. Shaposhnick

On Mon, May 01, 2000 at 01:35:52PM +0100, Steve Dodd wrote:
> On Mon, May 01, 2000 at 01:41:43AM +0400, Roman V. Shaposhnick wrote:
> > On Sun, Apr 30, 2000 at 03:28:18PM +0100, Steve Dodd wrote:
> 
> > > But an address_space is (or could be) a completely generic cache. It might
> > > never be associated with an inode, let alone a dentry or file structure.
> > 
> >   Ok, ok, hold on, it is filemap.c where all this stuff is defined, I guess
> > the name gives a hint about that it definitely is associated with some kind 
> > of file machinery. But I understand what you mean. See my comments below.
> 
> OK, more precisely, an address_space could (and IMO *should*) be a generic
> /page/ cache. Anything that's caching multiple pages of data indexed by an
> offset could use it. Then the page cache can sync pages, steal pages, etc.,
> etc., without having to know what the pages are being used for. At the moment,
> it's only used for inode data pages, but with some small changes it can be
> much more useful - and I believe this was Al's original intention.

  I see what you mean. And I completely agree with only one exception: we
should not use and we should not think of address_space as a generic cache
till we implement the interface for address_space_operations that:
 
1. can work with *any* type of host object
2. at the same time can work with stackable or derived ( in C++
   terminology ) host objects like file->dentry->inode->phis.
3. and has a reasonable and kludge-free specification.

  I agree that providing such interface to the address_space will simplify 
things a lot, but for now I see no evidence of somebody doing this. 

 As for, Al, I guess that only he could ever tell us what he is doing. But he 
keeps silence.

> > 
> > > For example, I've got some experimental NTFS code which caches all metadata
> > > in the page cache using the address_space stuff.
> [..]
> 
> >   IMHO, you are going wrong direction here. My point is that sometimes  
> > we would like to see address_space stuff as generic cache but it is not.
> 
> I disagree; at the moment the interface is not generic, but my belief is
> that it can be made so with only small changes to existing code. However,
> I haven't looked at all existing code yet. NFS is doing something odd, and I
> also should look at your code if that's possible (is it available somewhere?)
 
  No but I hope you will be able to see it in 2.5. btw,  if you would like 
to see some parts that you are interested at I can send you early pre-alpha.

> > Or being more precise it is a generic cache from the generic perspective,
> > but when you try to use generic functions from filemap.c you are bound to
> > treat this beast as a file cache.
> 
> The page cache at the moment is mostly used for caching inode data, hence
> the name "filemap.c".

   Steve, please read my previous e-mail about what I consider file-cache and 
what generic cache.

> [..]
> >   Thus my opinion is that address_space_operations should remain 
> > file-oriented ( and if there are no good contras take the first argument 
> > of "struct file *" type ). At the same time it is possible to have completely
> > different set of methods around  the same address_space stuff, but from my 
> > point of view this story has nothing in common with how an *existing* 
> > file-oriented interface should work.
> 
> Hmm. Take ->readpage() for example. It is used to fill a page which "belongs"
> to an object. That object is referenced in page->mapping->host. For inode
> data, the host is the inode structure. When should readpage() ever need to
> see anything other than the object to which the pages belong? It doesn't make
> sense (to me). 

   I disagree; and mainly because sometimes it is hard to tell where object 
"begins" and where it "ends". Complex objects  are often  derived from 
simple  and it is a very common practice to use the highest level available
for the sake of optimization. E.g. in Linux userspace file is built around 
dentry which in turn is built around inode etc. Can we say that file do not
include inode directly ? Yes. Can we say that file do not include inode at
all? No. 

 Let me show you an example. Consider, that:
   1. you have a network-based filesystem with a state oriented protocol. 
   2. to do something with a file you need a special pointer to it called 
  file_id.
   3. before reading/writing to/from a file you should open its file_id
  for reading, writing or both. After you open file_id you can only
  read/write from/to it but you can not do nothing more.
   
 I guess you will agree with me that  the best place for "opened" file_id
is a file->private_data ? Ok. Now the question is, how can I access opened
file_id if all methods ( readpage, writepage, prepare_write and commit_write ) 
get the first argument of type inode ?

  Now imagine that we have interface with all properties described above, than 
it would be up to the each method to decide how to handle th

Re: new VFS method sync_page and stacking

2000-05-01 Thread Alexander Viro



On Mon, 1 May 2000, Steve Dodd wrote:

> On Mon, May 01, 2000 at 01:41:43AM +0400, Roman V. Shaposhnick wrote:
> > On Sun, Apr 30, 2000 at 03:28:18PM +0100, Steve Dodd wrote:
> 
> > > But an address_space is (or could be) a completely generic cache. It might
> > > never be associated with an inode, let alone a dentry or file structure.
> > 
> >   Ok, ok, hold on, it is filemap.c where all this stuff is defined, I guess
> > the name gives a hint about that it definitely is associated with some kind 
> > of file machinery. But I understand what you mean. See my comments below.
> 
> OK, more precisely, an address_space could (and IMO *should*) be a generic
> /page/ cache. Anything that's caching multiple pages of data indexed by an
> offset could use it. Then the page cache can sync pages, steal pages, etc.,
> etc., without having to know what the pages are being used for. At the moment,
> it's only used for inode data pages, but with some small changes it can be
> much more useful - and I believe this was Al's original intention.

It was and it still is. However, completely losing the damn struct file
may be tricky.

Theory: different clients may want to share the cache for remote file.
They use some authentication to sign the RPC requests. I.e. you need to
have some token passed to the methods. struct file * is bogus here - it's
void * and it should be ignored by almost every address_space.

generic_write_file() has every reason to expect that a-s it deals with
accepts file as a token (if it needs the thing at all), but that's
generic_file_write(). mm/filemap.c contains a lot of stuff that is
completely generic _and_ some file-related pieces. Yes, it needs further
cleanups and separation.

> > > For example, I've got some experimental NTFS code which caches all metadata
> > > in the page cache using the address_space stuff.
> [..]
> 
> >   IMHO, you are going wrong direction here. My point is that sometimes  
> > we would like to see address_space stuff as generic cache but it is not.
> 
> I disagree; at the moment the interface is not generic, but my belief is
> that it can be made so with only small changes to existing code. However,
> I haven't looked at all existing code yet. NFS is doing something odd, and I
> also should look at your code if that's possible (is it available somewhere?)

RPC signing...




Re: new VFS method sync_page and stacking

2000-05-01 Thread Roman V. Shaposhnick

On Sun, Apr 30, 2000 at 06:40:40PM +0100, Steve Dodd wrote:

[ ... ]

> I'd like to the see the address_space methods /lose/ the struct file /
> struct dentry pointer, but it may be there are situations which require
> it.

  And you can suggest a viable alternative, that can be a truly generic? 
You've told us your needs and I've told mine. Your filesystem is local and 
mine is network based. We *need* different address_space_operations, but
I feel very uncomfortable when one tries to speculate on this subject 
without reasonable proposals. For example, it costs me some time to resurrect 
struct file * as a first argument for 'writepage', and I hope I will be able
to cast that kind of spell on 'readpage' too. 

> I presume it's been added to replace explicit tq_disk runs, which will allow
> finer-grained syncing as described above, and also to ensure correctness in
> nfs and other similar filesystems.

  From my point of view, this method is like a sequence point in C
expressions. Nfs has a good description of what this method should do.
Finally, I guess we are speaking about the same thing only using different
words. 

Thanks,
Roman.



Re: new VFS method sync_page and stacking

2000-05-01 Thread Roman V. Shaposhnick

On Sun, Apr 30, 2000 at 05:58:22PM -0400, Erez Zadok wrote:
> In message <[EMAIL PROTECTED]>, "Roman V. Shaposhnick" writes:
> > On Sun, Apr 30, 2000 at 03:28:18PM +0100, Steve Dodd wrote:
> [...]
> > > But an address_space is (or could be) a completely generic cache. It
> > > might never be associated with an inode, let alone a dentry or file
> > > structure.
> 
> [...]
> >   Thus my opinion is that address_space_operations should remain 
> > file-oriented ( and if there are no good contras take the first argument 
> > of "struct file *" type ). At the same time it is possible to have completely
> > different set of methods around  the same address_space stuff, but from my 
> > point of view this story has nothing in common with how an *existing* 
> > file-oriented interface should work.
> >   
> > Thanks, 
> > Roman.
> 
> If you look at how various address_space ops are called, you'll see enough
> evidence of an attempt to make this interface both a file-based interface
> and a generic cache one (well, at least as far as I understood the code):

  What do you mean by "generic cache" ? From what I see in kernel code I have 
a feeling that this kind of evidence is caused by an abuse of the interface. 
  Why? IMHO, because:
  
 1. In UNIX everything is a file. Thus we need just one type of a 
cache -- cache for files that can be cached.
 2. File is not an atomic beast and there are four kind of objects 
that userspace file brings into existence:
 file->dentry->inode->[some phisical entity].
 3. Different kernel subsystems work on different layers of this 
picture and every relation in it is many-to-one.
 
Thus, considering 1, 2 and 3 we can clearly see that methods of
address_space_operations should be polymorphic in C++ sense. Depending on 
what subsystem calls them they should accept right type of parameters or
speaking it other way they should know on what level of the picture from 
para 2 the are supposed to work. Do you agree with that ? If yes, that 
the question of how to implement that kind  of polymorphism is a simple 
matter of coding style. Yes ?


> (1) generic_file_write (mm/filemap.c) can call ->commit_write with a normal
> non-NULL file.
> (2) block_symlink (fs/buffer.c) calls ->commit_write with NULL for the file
> arg.

   But it's the only one example. And moreover it proves that 'block_symlink', 
the only function that uses adress_space_operations directly,
suffers from the problem I've mentioned above. Let's see a typical usecase:

 fs/ext2/namei.c:

   ext2_symlink( struct inode *, struct dentry *, const char *) {
   .
   err = block_symlink(inode, symname, l);
   .
   }

What we have here is the subsystem that works on inode->... level -- that's it.
It is not an example of generic caching but just an example of that we can not
do without polymorphic interface.

> So perhaps to satisfy the various needs, all address_space ops should be
  
  Steve, Erez, please, show me an example of where do you need generic caching 
that fits into address_space infrastructure ? I can agree that we do need
some caches but they are not generic at all. For example in every network
based filesystem ( nfs, smbfs, ncpfs, mine ) there is a directory cache.
But all of them use address_space infrastructure with their own methods.
See nfs/dir.c and smbfs/cache.c for amusement. 

> passed a struct file which may be NULL; the individual f/s will have to
> check for it being NULL and deal with it.  (My stacking code already treats
> commit_write this way.)

   Don't get me wrong, but my strong persuasion here is that passing NULLS
when we can not pass meaningful value is yet another example of a huge 
abuse of the interface. 

Thanks,
Roman.



Re: new VFS method sync_page and stacking

2000-05-01 Thread Erez Zadok

In message <[EMAIL PROTECTED]>, Steve Dodd writes:
> On Sun, Apr 30, 2000 at 04:46:37AM -0400, Erez Zadok wrote:
> 
> > Background: my stacking code for linux is minimal.  I only stack on
> > things I absolutely have to.  By "stack on" I mean that I save a
> > link/pointer to a lower-level object in the private data field of an
> > upper-level object.  I do so for struct file, inode, dentry, etc.  But I
> > do NOT stack on pages.  Doing so would complicate stacking considerably.
> > So far I was able to avoid this b/c every function that deals with pages
> > also passes a struct file/dentry to it so I can find the correct lower
> > page.
> 
> You shouldn't need to "stack on" pages anyway, I wouldn't have thought.
> For each page you can reference mapping->host, which should point to the
> hosting structure (at the moment always an inode, but this may change).
> 
> > The new method, sync_page() is only passed a struct page.  So I cannot
> > stack on it!  If I have to stack on it, I'll have to either
> > 
> > (1) complicate my stacking code considerably by stacking on pages.  This is
> > impossible for my stackable compression file system, b/c the mapping of
> > upper and lower pages is not 1-to-1.
> 
> Why can your sync_page implementation not grab the inode from mapping->host
> and then call sync_page on the underlying fs' page(s) that hold the data?

I can, and I do (at least now :-) I tried it last night and so far it seems
to work just fine.

> > (2) change the kernel so that every instance of sync_page is passed the
> > corresponding struct file.  This isn't pretty either.
> 
> I'd like to the see the address_space methods /lose/ the struct file /
> struct dentry pointer, but it may be there are situations which require
> it.

I took a closer look at my address_space ops for stacking.  We don't do
anything special with the struct file/dentry that we get.  We just pass
those along (or their lower unstacked counterparts) to other address_space
ops which require them.  We get corresponding lower pages using the
mapping->host inode.

I also agree that pages should be associated with the inode, not the
file/dentry.

So I'm now leaning more towards losing the struct file/dentry from the
address_space ops.  Furthermore, since the address_space structure showed up
relatively recently, we might consider cleaning up this API before 2.4.  I
believe my stacking code would work fine w/o these struct file/dentry being
passed around (Ion, can you verify this please?)

Thanks for the info Steve.

Erez.



Re: new VFS method sync_page and stacking

2000-05-01 Thread Steve Dodd

On Mon, May 01, 2000 at 01:41:43AM +0400, Roman V. Shaposhnick wrote:
> On Sun, Apr 30, 2000 at 03:28:18PM +0100, Steve Dodd wrote:

> > But an address_space is (or could be) a completely generic cache. It might
> > never be associated with an inode, let alone a dentry or file structure.
> 
>   Ok, ok, hold on, it is filemap.c where all this stuff is defined, I guess
> the name gives a hint about that it definitely is associated with some kind 
> of file machinery. But I understand what you mean. See my comments below.

OK, more precisely, an address_space could (and IMO *should*) be a generic
/page/ cache. Anything that's caching multiple pages of data indexed by an
offset could use it. Then the page cache can sync pages, steal pages, etc.,
etc., without having to know what the pages are being used for. At the moment,
it's only used for inode data pages, but with some small changes it can be
much more useful - and I believe this was Al's original intention.

> 
> > For example, I've got some experimental NTFS code which caches all metadata
> > in the page cache using the address_space stuff.
[..]

>   IMHO, you are going wrong direction here. My point is that sometimes  
> we would like to see address_space stuff as generic cache but it is not.

I disagree; at the moment the interface is not generic, but my belief is
that it can be made so with only small changes to existing code. However,
I haven't looked at all existing code yet. NFS is doing something odd, and I
also should look at your code if that's possible (is it available somewhere?)

> Or being more precise it is a generic cache from the generic perspective,
> but when you try to use generic functions from filemap.c you are bound to
> treat this beast as a file cache.

The page cache at the moment is mostly used for caching inode data, hence
the name "filemap.c".

[..]
>   Thus my opinion is that address_space_operations should remain 
> file-oriented ( and if there are no good contras take the first argument 
> of "struct file *" type ). At the same time it is possible to have completely
> different set of methods around  the same address_space stuff, but from my 
> point of view this story has nothing in common with how an *existing* 
> file-oriented interface should work.

Hmm. Take ->readpage() for example. It is used to fill a page which "belongs"
to an object. That object is referenced in page->mapping->host. For inode
data, the host is the inode structure. When should readpage() ever need to
see anything other than the object to which the pages belong? It doesn't make
sense (to me). If you think you need to access other data, it should either
be accessible through the host object, or you have used the wrong host object.

Inode data pages are per-inode, not per-dentry or per-file-struct.




Re: new VFS method sync_page and stacking

2000-05-01 Thread Steve Dodd

On Sun, Apr 30, 2000 at 03:57:26PM -0400, Erez Zadok wrote:

> It sounds like different people have possibly conflicting needs.  I think
> any major changes should wait for 2.5.

Almost certainly, though there is an argument for cleaning up these APIs
now (before we go to 2.4) so that we don't have to change them again too
much in 2.5/6. OTOH, if it's going to cause problems for existing code (nfs,
and external modules like the stacking stuff) then we should wait.

> I would also suggest that such
> significant VFS changes be discussed on this list so we can ensure that we
> can all get what we need out of the VFS.  Thanks.

Of course :)



Re: new VFS method sync_page and stacking

2000-05-01 Thread Steve Dodd

On Sun, Apr 30, 2000 at 04:46:37AM -0400, Erez Zadok wrote:

> Background: my stacking code for linux is minimal.  I only stack on things I
> absolutely have to.  By "stack on" I mean that I save a link/pointer to a
> lower-level object in the private data field of an upper-level object.  I do
> so for struct file, inode, dentry, etc.  But I do NOT stack on pages.  Doing
> so would complicate stacking considerably.  So far I was able to avoid this
> b/c every function that deals with pages also passes a struct file/dentry to
> it so I can find the correct lower page.

You shouldn't need to "stack on" pages anyway, I wouldn't have thought.
For each page you can reference mapping->host, which should point to the
hosting structure (at the moment always an inode, but this may change).

> The new method, sync_page() is only passed a struct page.  So I cannot stack
> on it!  If I have to stack on it, I'll have to either
> 
> (1) complicate my stacking code considerably by stacking on pages.  This is
> impossible for my stackable compression file system, b/c the mapping of
> upper and lower pages is not 1-to-1.

Why can your sync_page implementation not grab the inode from mapping->host
and then call sync_page on the underlying fs' page(s) that hold the data?

> (2) change the kernel so that every instance of sync_page is passed the
> corresponding struct file.  This isn't pretty either.

I'd like to the see the address_space methods /lose/ the struct file /
struct dentry pointer, but it may be there are situations which require
it.

> Luckily, sync_page isn't used too much.  Only nfs seems to use it at the
> moment.  All other file systems which define ->sync_page use
> block_sync_page() which is defined as:
> 
> int block_sync_page(struct page *page)
> {
>   run_task_queue(&tq_disk);
>   return 0;
> }
> 
> This is confusing.  Why would block_sync_page ignore the page argument and
> call something else.  The name "block_sync_page" might be misleading.  The
> only thing I can think of is that block_sync_page is a placeholder for for a
> time when it would actually do something with the page.

No, this looks OK to me. sync_page seems to be designed to ensure that any
async I/O on a page is complete. For normal "block-mapped" filesystems (which
is what the block_* helpers in buffer.c are for), pages with "in flight" I/O
will have requests on the appropriate block device's queue. Running tq_disk
should ensure these are completed. If tq_disk is ever split up (I believe Jens
Axboe is working on this), block_sync_page will need to look at
page->mapping->host to determine the device, I assume.

Non block-mapped filesystems (e.g. nfs) will need to do something different.

> Anyway, since sync_page appears to be an optional function, I've tried my
> stacking without defining my own ->sync_page.  Preliminary results show it
> seems to work.  However, if at any point I'd have to define ->sync_page page
> and have to call the lower file system's ->sync_page, I'd urge a change in
> the prototype of this method that would make it possible for me to stack
> this operation.
> 
> Also, I don't understand what's ->sync_page for in the first place.  The
> name of the fxn implies it might be something like a commit_write.

I presume it's been added to replace explicit tq_disk runs, which will allow
finer-grained syncing as described above, and also to ensure correctness in
nfs and other similar filesystems.



Re: new VFS method sync_page and stacking

2000-04-30 Thread Erez Zadok

In message <[EMAIL PROTECTED]>, "Roman V. Shaposhnick" writes:
> On Sun, Apr 30, 2000 at 03:28:18PM +0100, Steve Dodd wrote:
[...]
> > But an address_space is (or could be) a completely generic cache. It
> > might never be associated with an inode, let alone a dentry or file
> > structure.

[...]
>   Thus my opinion is that address_space_operations should remain 
> file-oriented ( and if there are no good contras take the first argument 
> of "struct file *" type ). At the same time it is possible to have completely
> different set of methods around  the same address_space stuff, but from my 
> point of view this story has nothing in common with how an *existing* 
> file-oriented interface should work.
>   
> Thanks, 
> Roman.

If you look at how various address_space ops are called, you'll see enough
evidence of an attempt to make this interface both a file-based interface
and a generic cache one (well, at least as far as I understood the code):

(1) generic_file_write (mm/filemap.c) can call ->commit_write with a normal
non-NULL file.
(2) block_symlink (fs/buffer.c) calls ->commit_write with NULL for the file
arg.

So perhaps to satisfy the various needs, all address_space ops should be
passed a struct file which may be NULL; the individual f/s will have to
check for it being NULL and deal with it.  (My stacking code already treats
commit_write this way.)

Erez.



Re: new VFS method sync_page and stacking

2000-04-30 Thread Roman V. Shaposhnick

On Sun, Apr 30, 2000 at 03:28:18PM +0100, Steve Dodd wrote:
> On Sun, Apr 30, 2000 at 01:44:50PM +0400, Roman V. Shaposhnick wrote:
> 
> >Did you see my letter about readpage ? Nevertheless, I think that first
> > argument of every function from address_space_operations should be 
> > "struct file *" and AFAIK this is 1) possible with the current kernel 2) will
> > simplify things a lot since it lets one to see the whole picture:
> > file->dentry->inode->pages, not the particular spot.
> 
> But an address_space is (or could be) a completely generic cache. It might
> never be associated with an inode, let alone a dentry or file structure.

  Ok, ok, hold on, it is filemap.c where all this stuff is defined, I guess
the name gives a hint about that it definitely is associated with some kind 
of file machinery. But I understand what you mean. See my comments below.

> For example, I've got some experimental NTFS code which caches all metadata
> in the page cache using the address_space stuff. (This /mostly/ works really
> well, and makes the code a lot simpler. The only problem is
> block_read_full_page() and friends, which do:
> 
>   struct inode *inode = (struct inode*)page->mapping->host;
> 
> At the moment I have an evil hack in place -- I'm kludging up an inode
> structure and temporarily changing mapping->host before I call
> block_read_full_page. I'd really like to see this cleaned up, though I accept
> it may not happen before 2.5.)

  IMHO, you are going wrong direction here. My point is that sometimes  
we would like to see address_space stuff as generic cache but it is not.
Or being more precise it is a generic cache from the generic perspective,
but when you try to use generic functions from filemap.c you are bound to
treat this beast as a file cache. I mean you can use existing infrastructure
of data, but you should provide your own methods. I am sure, that if you do 
that kind of hacks you are simply using wrong interface. See for example 
how nfs arranges its dir-cache ( nfs/dir.c ).

  Thus my opinion is that address_space_operations should remain 
file-oriented ( and if there are no good contras take the first argument 
of "struct file *" type ). At the same time it is possible to have completely
different set of methods around  the same address_space stuff, but from my 
point of view this story has nothing in common with how an *existing* 
file-oriented interface should work.
  
Thanks, 
Roman.



Re: new VFS method sync_page and stacking

2000-04-30 Thread Erez Zadok

In message <[EMAIL PROTECTED]>, Steve Dodd writes:
> On Sun, Apr 30, 2000 at 01:44:50PM +0400, Roman V. Shaposhnick wrote:
> 
> >Did you see my letter about readpage ? Nevertheless, I think that first
> > argument of every function from address_space_operations should be 
> > "struct file *" and AFAIK this is 1) possible with the current kernel 2) will
> > simplify things a lot since it lets one to see the whole picture:
> > file->dentry->inode->pages, not the particular spot.
> 
> But an address_space is (or could be) a completely generic cache. It might
> never be associated with an inode, let alone a dentry or file structure.
> 
> For example, I've got some experimental NTFS code which caches all metadata
> in the page cache using the address_space stuff. (This /mostly/ works really
> well, and makes the code a lot simpler. The only problem is
> block_read_full_page() and friends, which do:
> 
>   struct inode *inode = (struct inode*)page->mapping->host;
> 
> At the moment I have an evil hack in place -- I'm kludging up an inode
> structure and temporarily changing mapping->host before I call
> block_read_full_page. I'd really like to see this cleaned up, though I accept
> it may not happen before 2.5.)

It sounds like different people have possibly conflicting needs.  I think
any major changes should wait for 2.5.  I would also suggest that such
significant VFS changes be discussed on this list so we can ensure that we
can all get what we need out of the VFS.  Thanks.

Erez.



Re: new VFS method sync_page and stacking

2000-04-30 Thread Steve Dodd

On Sun, Apr 30, 2000 at 01:44:50PM +0400, Roman V. Shaposhnick wrote:

>Did you see my letter about readpage ? Nevertheless, I think that first
> argument of every function from address_space_operations should be 
> "struct file *" and AFAIK this is 1) possible with the current kernel 2) will
> simplify things a lot since it lets one to see the whole picture:
> file->dentry->inode->pages, not the particular spot.

But an address_space is (or could be) a completely generic cache. It might
never be associated with an inode, let alone a dentry or file structure.

For example, I've got some experimental NTFS code which caches all metadata
in the page cache using the address_space stuff. (This /mostly/ works really
well, and makes the code a lot simpler. The only problem is
block_read_full_page() and friends, which do:

struct inode *inode = (struct inode*)page->mapping->host;

At the moment I have an evil hack in place -- I'm kludging up an inode
structure and temporarily changing mapping->host before I call
block_read_full_page. I'd really like to see this cleaned up, though I accept
it may not happen before 2.5.)



Re: new VFS method sync_page and stacking

2000-04-30 Thread Erez Zadok

In message <[EMAIL PROTECTED]>, "Roman V. Shaposhnick" writes:
> On Sun, Apr 30, 2000 at 04:46:37AM -0400, Erez Zadok wrote:

> > Background: my stacking code for linux is minimal.  I only stack on
> > things I absolutely have to.  By "stack on" I mean that I save a
> > link/pointer to a lower-level object in the private data field of an
> > upper-level object.  I do so for struct file, inode, dentry, etc.  But I
> > do NOT stack on pages.  Doing so would complicate stacking considerably.
> > So far I was able to avoid this b/c every function that deals with pages
> > also passes a struct file/dentry to it so I can find the correct lower
> > page.
> > 
> > The new method, sync_page() is only passed a struct page.  So I cannot
> > stack on it!  If I have to stack on it, I'll have to either
> 
>  If inode will be enough for you than ( as it is implemented in
> nfs_sync_page ) you can do something like:
>struct inode*inode = (struct inode *)page->mapping->host;

Yes I can probably do that.  I can get the inode, from it I can get the
lower level inode since I stack on inodes.  Then I can call grab_cache_page
on the i_mapping of the lower inode and given this page's index.  I'll give
this idea a try.  Thanks.

> > (2) change the kernel so that every instance of sync_page is passed the
> > corresponding struct file.  This isn't pretty either.
> > 
>Did you see my letter about readpage ? Nevertheless, I think that first
> argument of every function from address_space_operations should be "struct
> file *" and AFAIK this is 1) possible with the current kernel 2) will
> simplify things a lot since it lets one to see the whole picture:
> file->dentry->inode->pages, not the particular spot.

Yes, I saw your post.  I agree.  I'm all for common-looking APIs.

> Roman.

Erez.



Re: new VFS method sync_page and stacking

2000-04-30 Thread Roman V. Shaposhnick

On Sun, Apr 30, 2000 at 04:46:37AM -0400, Erez Zadok wrote:
> Background: my stacking code for linux is minimal.  I only stack on things I
> absolutely have to.  By "stack on" I mean that I save a link/pointer to a
> lower-level object in the private data field of an upper-level object.  I do
> so for struct file, inode, dentry, etc.  But I do NOT stack on pages.  Doing
> so would complicate stacking considerably.  So far I was able to avoid this
> b/c every function that deals with pages also passes a struct file/dentry to
> it so I can find the correct lower page.
> 
> The new method, sync_page() is only passed a struct page.  So I cannot stack
> on it!  If I have to stack on it, I'll have to either

 If inode will be enough for you than ( as it is implemented in nfs_sync_page )
you can do something like: 
   struct inode*inode = (struct inode *)page->mapping->host;

> (2) change the kernel so that every instance of sync_page is passed the
> corresponding struct file.  This isn't pretty either.
> 

   Did you see my letter about readpage ? Nevertheless, I think that first
argument of every function from address_space_operations should be 
"struct file *" and AFAIK this is 1) possible with the current kernel 2) will
simplify things a lot since it lets one to see the whole picture:
file->dentry->inode->pages, not the particular spot.

Roman.