Re: [PATCH 0/2] writeback dirty inodes fixes

2007-08-10 Thread Fengguang Wu
On Sat, Aug 11, 2007 at 02:02:02PM +0800, Fengguang Wu wrote:
> Andrew,
> 
> Now the patches are simplified and rebased to 2.6.23-rc2-mm2.
> 
> The following two patches should be put immediately after
> writeback-fix-periodic-superblock-dirty-inode-flushing.patch:
> 
>  writeback: fix time ordering of the per superblock inode lists 8   
>  writeback: fix ntfs with sb_has_dirty_inodes() 

The following tree patches should be updated to resolve merge conflicts:

sync_sb_inodes-propagate-errors.patch 
reiser4-sb_sync_inodes.patch
check_dirty_inode_list.patch (extended to check s_io/s_more_io)

They are attached in this mail.
From: Andrew Morton <[EMAIL PROTECTED]>

Guillame points out that sync_sb_inodes() is failing to propagate error codes
back.  Fix that, and make several other void-returning functions not drop
reportable error codes.

Cc: Guillaume Chazarain <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
---

 fs/fs-writeback.c |   56 +++-
 include/linux/writeback.h |6 +--
 2 files changed, 45 insertions(+), 17 deletions(-)

--- linux-2.6.23-rc2-mm2.orig/fs/fs-writeback.c
+++ linux-2.6.23-rc2-mm2/fs/fs-writeback.c
@@ -392,13 +392,17 @@ __writeback_single_inode(struct inode *i
  * on the writer throttling path, and we get decent balancing between many
  * throttled threads: we don't want them all piling up on inode_sync_wait.
  */
-static void
+static int
 sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
 {
+	int ret = 0;
+
 	if (!wbc->for_kupdate || list_empty(&sb->s_io))
 		queue_io(sb, wbc->older_than_this);
 
 	while (!list_empty(&sb->s_io)) {
+		int err;
+
 		struct inode *inode = list_entry(sb->s_io.prev,
 		struct inode, i_list);
 		struct address_space *mapping = inode->i_mapping;
@@ -444,7 +448,9 @@ sync_sb_inodes(struct super_block *sb, s
 		BUG_ON(inode->i_state & I_FREEING);
 		__iget(inode);
 		pages_skipped = wbc->pages_skipped;
-		__writeback_single_inode(inode, wbc);
+		err = __writeback_single_inode(inode, wbc);
+		if (!ret)
+			ret = err;
 		if (wbc->sync_mode == WB_SYNC_HOLD) {
 			inode->dirtied_when = jiffies;
 			list_move(&inode->i_list, &sb->s_dirty);
@@ -469,7 +475,7 @@ sync_sb_inodes(struct super_block *sb, s
 	if (list_empty(&sb->s_io))
 		list_splice_init(&sb->s_more_io, &sb->s_io);
 
-	return;		/* Leave any unwritten inodes on s_io */
+	return ret;		/* Leave any unwritten inodes on s_io */
 }
 
 /*
@@ -491,10 +497,10 @@ sync_sb_inodes(struct super_block *sb, s
  * sync_sb_inodes will seekout the blockdev which matches `bdi'.  Maybe not
  * super-efficient but we're about to do a ton of I/O...
  */
-void
-writeback_inodes(struct writeback_control *wbc)
+int writeback_inodes(struct writeback_control *wbc)
 {
 	struct super_block *sb;
+	int ret = 0;
 
 	might_sleep();
 	spin_lock(&sb_lock);
@@ -512,9 +518,13 @@ restart:
 			 */
 			if (down_read_trylock(&sb->s_umount)) {
 if (sb->s_root) {
+	int err;
+
 	spin_lock(&inode_lock);
-	sync_sb_inodes(sb, wbc);
+	err = sync_sb_inodes(sb, wbc);
 	spin_unlock(&inode_lock);
+	if (!ret)
+		ret = err;
 }
 up_read(&sb->s_umount);
 			}
@@ -526,6 +536,7 @@ restart:
 			break;
 	}
 	spin_unlock(&sb_lock);
+	return ret;
 }
 
 /*
@@ -539,7 +550,7 @@ restart:
  * We add in the number of potentially dirty inodes, because each inode write
  * can dirty pagecache in the underlying blockdev.
  */
-void sync_inodes_sb(struct super_block *sb, int wait)
+int sync_inodes_sb(struct super_block *sb, int wait)
 {
 	struct writeback_control wbc = {
 		.sync_mode	= wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
@@ -548,14 +559,16 @@ void sync_inodes_sb(struct super_block *
 	};
 	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
 	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+	int ret;
 
 	wbc.nr_to_write = nr_dirty + nr_unstable +
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused) +
 			nr_dirty + nr_unstable;
 	wbc.nr_to_write += wbc.nr_to_write / 2;		/* Bit more for luck */
 	spin_lock(&inode_lock);
-	sync_sb_inodes(sb, &wbc);
+	ret = sync_sb_inodes(sb, &wbc);
 	spin_unlock(&inode_lock);
+	return ret;
 }
 
 /*
@@ -591,13 +604,16 @@ static void set_sb_syncing(int val)
  * outstanding dirty inodes, the writeback goes block-at-a-time within the
  * filesystem's write_inode().  This is extremely slow.
  */
-static void __sync_inodes(int wait)
+static int __sync_inodes(int wait)
 {
 	struct super_block *sb;
+	int ret = 0;
 
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
+		int err;
+
 		if (sb->s_syncing)
 			continue;
 		sb->s_syncing = 1;
@@ -605,8 +621,12 @@ restart:
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root) {
-			sync_inodes_sb(sb, wait);
-			sync_blockdev(sb->s_bdev);
+			err = sync_inodes_sb(sb, wait);
+			if (!ret)
+ret = err;
+			err = sync_blockdev(sb->s_bdev);
+			if (!ret)
+ret = err;
 	

[PATCH 1/2] writeback: fix time ordering of the per superblock inode lists 8

2007-08-10 Thread Fengguang Wu
Fix the time ordering bug re-introduced by
writeback-fix-periodic-superblock-dirty-inode-flushing.patch.

It works by never move not-yet-expired dirty inodes from s_dirty to s_io,
*only to* move them back. The move-inodes-back-and-forth thing is a mess.

Cc: Ken Chen <[EMAIL PROTECTED]>
Cc: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Fengguang Wu <[EMAIL PROTECTED]>
---
 fs/fs-writeback.c |   40 ++--
 1 file changed, 22 insertions(+), 18 deletions(-)

--- linux-2.6.23-rc2-mm2.orig/fs/fs-writeback.c
+++ linux-2.6.23-rc2-mm2/fs/fs-writeback.c
@@ -172,6 +172,23 @@ static void requeue_io(struct inode *ino
 }
 
 /*
+ * Queue expired dirty inodes for io.
+ */
+static void queue_io(struct super_block *sb,
+   unsigned long *older_than_this)
+{
+   while (!list_empty(&sb->s_dirty)) {
+   struct inode *inode = list_entry(sb->s_dirty.prev,
+   struct inode, i_list);
+   /* Was this inode dirtied too recently? */
+   if (older_than_this &&
+   time_after(inode->dirtied_when, *older_than_this))
+   break;
+   list_move(&inode->i_list, &sb->s_io);
+   }
+}
+
+/*
  * Write a single inode's dirty pages and inode data out to disk.
  * If `wait' is set, wait on the writeout.
  *
@@ -295,10 +312,10 @@ __writeback_single_inode(struct inode *i
 
/*
 * We're skipping this inode because it's locked, and we're not
-* doing writeback-for-data-integrity.  Move it to the head of
-* s_dirty so that writeback can proceed with the other inodes
-* on s_io.  We'll have another go at writing back this inode
-* when the s_dirty iodes get moved back onto s_io.
+* doing writeback-for-data-integrity.  Move it to s_more_io so
+* that writeback can proceed with the other inodes on s_io.
+* We'll have another go at writing back this inode when we
+* completed a full scan of s_io.
 */
requeue_io(inode);
 
@@ -362,10 +379,8 @@ __writeback_single_inode(struct inode *i
 static void
 sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
 {
-   const unsigned long start = jiffies;/* livelock avoidance */
-
if (!wbc->for_kupdate || list_empty(&sb->s_io))
-   list_splice_init(&sb->s_dirty, &sb->s_io);
+   queue_io(sb, wbc->older_than_this);
 
while (!list_empty(&sb->s_io)) {
struct inode *inode = list_entry(sb->s_io.prev,
@@ -406,17 +421,6 @@ sync_sb_inodes(struct super_block *sb, s
continue;   /* blockdev has wrong queue */
}
 
-   /* Was this inode dirtied after sync_sb_inodes was called? */
-   if (time_after(inode->dirtied_when, start))
-   break;
-
-   /* Was this inode dirtied too recently? */
-   if (wbc->older_than_this && time_after(inode->dirtied_when,
-   *wbc->older_than_this)) {
-   list_splice_init(&sb->s_io, sb->s_dirty.prev);
-   break;
-   }
-
/* Is another pdflush already flushing this queue? */
if (current_is_pdflush() && !writeback_acquire(bdi))
break;

-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] writeback: fix ntfs with sb_has_dirty_inodes()

2007-08-10 Thread Fengguang Wu
NTFS's if-condition on dirty inodes is not complete.
Fix it with sb_has_dirty_inodes().

Cc: Anton Altaparmakov <[EMAIL PROTECTED]>
Cc: Ken Chen <[EMAIL PROTECTED]>
Cc: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Fengguang Wu <[EMAIL PROTECTED]>
---
---
 fs/fs-writeback.c  |9 -
 fs/ntfs/super.c|4 ++--
 include/linux/fs.h |1 +
 3 files changed, 11 insertions(+), 3 deletions(-)

--- linux-2.6.23-rc2-mm2.orig/fs/ntfs/super.c
+++ linux-2.6.23-rc2-mm2/fs/ntfs/super.c
@@ -2381,14 +2381,14 @@ static void ntfs_put_super(struct super_
 */
ntfs_commit_inode(vol->mft_ino);
write_inode_now(vol->mft_ino, 1);
-   if (!list_empty(&sb->s_dirty)) {
+   if (sb_has_dirty_inodes(sb)) {
const char *s1, *s2;
 
mutex_lock(&vol->mft_ino->i_mutex);
truncate_inode_pages(vol->mft_ino->i_mapping, 0);
mutex_unlock(&vol->mft_ino->i_mutex);
write_inode_now(vol->mft_ino, 1);
-   if (!list_empty(&sb->s_dirty)) {
+   if (sb_has_dirty_inodes(sb)) {
static const char *_s1 = "inodes";
static const char *_s2 = "";
s1 = _s1;
--- linux-2.6.23-rc2-mm2.orig/include/linux/fs.h
+++ linux-2.6.23-rc2-mm2/include/linux/fs.h
@@ -1712,6 +1712,7 @@ extern int bdev_read_only(struct block_d
 extern int set_blocksize(struct block_device *, int);
 extern int sb_set_blocksize(struct super_block *, int);
 extern int sb_min_blocksize(struct super_block *, int);
+extern int sb_has_dirty_inodes(struct super_block *);
 
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
--- linux-2.6.23-rc2-mm2.orig/fs/fs-writeback.c
+++ linux-2.6.23-rc2-mm2/fs/fs-writeback.c
@@ -188,6 +188,13 @@ static void queue_io(struct super_block 
}
 }
 
+int sb_has_dirty_inodes(struct super_block *sb)
+{
+   return !list_empty(&sb->s_dirty) ||
+  !list_empty(&sb->s_io);
+}
+EXPORT_SYMBOL(sb_has_dirty_inodes);
+
 /*
  * Write a single inode's dirty pages and inode data out to disk.
  * If `wait' is set, wait on the writeout.
@@ -485,7 +492,7 @@ writeback_inodes(struct writeback_contro
 restart:
sb = sb_entry(super_blocks.prev);
for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
-   if (!list_empty(&sb->s_dirty) || !list_empty(&sb->s_io)) {
+   if (sb_has_dirty_inodes(sb)) {
/* we're making our own get_super here */
sb->s_count++;
spin_unlock(&sb_lock);

-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] writeback dirty inodes fixes

2007-08-10 Thread Fengguang Wu
Andrew,

Now the patches are simplified and rebased to 2.6.23-rc2-mm2.

The following two patches should be put immediately after
writeback-fix-periodic-superblock-dirty-inode-flushing.patch:

 writeback: fix time ordering of the per superblock inode lists 8   
 writeback: fix ntfs with sb_has_dirty_inodes() 

Thank you,
Fengguang
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/25] move handling of setuid/gid bits from VFS into individual setattr functions (RESEND)

2007-08-10 Thread Christoph Hellwig
On Fri, Aug 10, 2007 at 04:47:52PM -0400, Jeff Layton wrote:
> attr->ia_valid after the setattr operation returns. If either ATTR_KILL_*
> bit is set then BUG(). The helper function already clears those bits
> so anything using it should automatically be ok. We'd have to fix
> up NFS and a few others that don't implement suid/sgid.
> 
> This is not as certain as changing the name of the inode operation. It
> would only pop when someone is attempting to change a setuid/setgid
> file on these filesystems. Still, it should conceivably catch most if
> not all offenders. Would that be sufficient to take care of everyone's
> concerns?

I like the idea of checking ia_valid after return a lot.  But instead of
going BUG() it should just do the default action, that we can avoid
touching all the filesystem and only need to change those that need
special care.  I also have plans to add some new AT_ flags for implementing
some filesystem ioctl in generic code that would benefit greatly from
the ia_valid checkin after return to return ENOTTY fr filesystems not
implementing those ioctls.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/16] Permit filesystem local caching [try #3]

2007-08-10 Thread Casey Schaufler

--- David Howells <[EMAIL PROTECTED]> wrote:

> These patches add local caching for network filesystems such as NFS and AFS.
> 
> FS-Cache now runs fully asynchronously as required by Trond Myklebust for
> NFS.
> 
> --
> Changes:
> [try #3]:
> 
>  (*) Added missing file to CacheFiles patch.
> 
>  (*) Made new security functions return errors and pass actual return data
> via
>  argument pointer.
> 
>  (*) Cleaned up NFS patch.
> 
>  (*) The 'fsc' flag must now be passed to NFS mount by the string options.
> 
>  (*) Split the NFS patch into three as requested by Trond.
> 
> [try #2]:
> 
>  (*) The CacheFiles module no longer accepts directory fds in its cull and
>  inuse commands from cachefilesd.  Instead it uses the current working
>  directory of the calling process as the basis for looking up the object.
>  Corollary to this, fget_light() no longer needs to be exported.
> 

How would you expect an LSM that is not SELinux to interface with
CacheFiles? You have gone to a great deal of effort to support the
requirements of an SELinux system, and that's good, but you have
extended the LSM interface to expose SELinux data structures (secids)
and require them for the operation of CacheFiles, and that's bad.
The data used within an LSM is private to the LSM, and this applies
to SELinux as well as to any other LSM that may come along, such
as the Smack LSM I'm working on. This applies to task data as well
as file data. Further, the behavior of the system in the presence
of an LSM should be controlled by the LSM, it is more than a little
scary that CacheFiles is enforcing SELinux policy based on secids
that may be coming from a different LSM.

I applaud the integration of CacheFiles with SELinux. Unfortunately,
you've done so using the LSM interface in such a way that an LSM
other than SELinux is likely to demonstrate inappropriate behaviors
in the presence of CacheFiles because you have so carefully integrated
the SELinux requirements.

If the integration with SELinux is important to you, and I would
expect that it is given the work you've put into it, I suggest that
the SELinux specific behaviors be identified so that another LSM
can provide the behavior appropriate to the policy it chooses to
enforce and put that into SELinux with an LSM interface. I know
that you're looking at a significant effort to do that, but I
wouldn't think that you'd want CacheFiles to behave badly in the
presence of an LSM that doesn't happen to be SELinux.

I also know it's tempting to point out the SELinux is the only
upstream LSM. I hope to change that before too long, and I know
there are others with ambitions as well. I would not like to see
CacheFiles have to get excluded in the presence of other LSMs
and I doubt you would either.


Casey Schaufler
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 00/25] move handling of setuid/gid bits from VFS into individual setattr functions (RESEND)

2007-08-10 Thread Jeff Layton
On Tue, 07 Aug 2007 20:45:34 -0400
Trond Myklebust <[EMAIL PROTECTED]> wrote:
> > - rename something so that unconverted filesystems will reliably fail to
> >   compile?
> > 
> > - leave existing filesystems alone, but add a new
> >   inode_operations.setattr_jeff, which the networked filesytems can
> >   implement, and teach core vfs to call setattr_jeff in preference to
> >   setattr?
> 
> If you really need to know that the filesystem is handling the flags,
> then how about instead having ->setattr() return something which
> indicates which flags it actually handled? That is likely to be a far
> more intrusive change, but it is one which is future-proof.
> 

One thing that we could do here is have notify_change check
attr->ia_valid after the setattr operation returns. If either ATTR_KILL_*
bit is set then BUG(). The helper function already clears those bits
so anything using it should automatically be ok. We'd have to fix
up NFS and a few others that don't implement suid/sgid.

This is not as certain as changing the name of the inode operation. It
would only pop when someone is attempting to change a setuid/setgid
file on these filesystems. Still, it should conceivably catch most if
not all offenders. Would that be sufficient to take care of everyone's
concerns?

-- 
Jeff Layton <[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/4] writeback: 3-queue based writeback schedule

2007-08-10 Thread Fengguang Wu
On Sat, Aug 11, 2007 at 12:47:15AM +0800, Fengguang Wu wrote:
> In fact s_more_io is no longer necessary. We end up with a priority

Ah sorry, s_more_io is still needed to keep the time-ordering. I was
thinking of schedule fairness, in which sense only one cyclic list
will be sufficient.

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 09/16] CacheFiles: Permit a process's create SID to be overridden [try #3]

2007-08-10 Thread Casey Schaufler

--- David Howells <[EMAIL PROTECTED]> wrote:

> Make it possible for a process's file creation SID to be temporarily
> overridden
> by CacheFiles so that files created in the cache have the right label
> attached.

I still object to the use of sids in LSM interfaces. I still owe you
a viable alternative.


Casey Schaufler
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/4] writeback: 3-queue based writeback schedule

2007-08-10 Thread Fengguang Wu
On Fri, Aug 10, 2007 at 02:34:14PM +0800, Fengguang Wu wrote:
> Properly manage the 3 queues of sb->s_dirty/s_io/s_more_io so that
>   - time-ordering of dirtied_when can be easily maintained
>   - writeback can continue from where previous run left out
> 
> The majority work has been done by Andrew Morton and Ken Chen,
> this patch just clarifies the roles of the 3 queues:
> - s_dirty   for io delay(up to dirty_expire_interval)
> - s_io  for io run(a full scan of s_io may involve multiple runs)
> - s_more_io for io continuation
> 
> The following paradigm shows the data flow.
> 
> requeue on new scan(empty s_io)
> +-+
> | |
>  dirty   old| |
>  inodes  enough V |
>  ==> s_dirty ==> s_io |
>  ^| requeue io|
>  |+-> s_more_io
>  |   hold back|
>  ++--> disk write requests
> 
> sb->s_dirty: a FIFO queue
> - s_dirty hosts not-yet-expired(recently dirtied) dirty inodes
> - once expired, inodes will be moved out of s_dirty and *never put back*
>   (unless for some reason we have to hold on the inode for some time)
> 
> sb->s_io and sb->s_more_io: a cyclic queue scanned for io
> - on each run of generic_sync_sb_inodes(), some more s_dirty inodes may be
>   appended to s_io
> - on each full scan of s_io, all s_more_io inodes will be moved back to s_io
> - large files that cannot be synced in one run will be moved to s_more_io for
>   retry on next full scan

In fact s_more_io is no longer necessary. We end up with a priority
io-delaying queue s_dirty and a cyclic io-syncing queue s_io. They are
properly decoupled.  More flexible data structure can be used for
s_dirty, if we want to redirty an inode with arbitrary delays. Also
more priority queues can be introduced in addition to s_dirty. For
example, we can designate a queue s_dirty_atime for inodes dirtied
only by `atime', and sync them lazily.

> inode->dirtied_when
> - inode->dirtied_when is updated to the *current* jiffies on pushing into
>   s_dirty, and is never changed in other cases.
> - time-ordering thus can be simply ensured while moving inodes between lists,
>   since (time order == enqueue order)
> 
> Cc: Ken Chen <[EMAIL PROTECTED]>
> Cc: Andrew Morton <[EMAIL PROTECTED]>
> Signed-off-by: Fengguang Wu <[EMAIL PROTECTED]>
> ---
>  fs/fs-writeback.c |  106 +---
>  1 file changed, 52 insertions(+), 54 deletions(-)

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 14/14] NFS: Use local caching [try #2]

2007-08-10 Thread Trond Myklebust
On Fri, 2007-08-10 at 15:04 +0100, David Howells wrote:
> Trond Myklebust <[EMAIL PROTECTED]> wrote:
> 
> > > > Dang, that's a lot of inlines... AFAICS, approx half of fs/nfs/fscache.h
> > > > should really be moved into fscache.c.
> > > 
> > > If you wish.  It seems a shame since a lot of them have only one caller.
> > 
> > ...however it also forces you to export a lot of stuff which is really
> > private to fscache.c (the atomics etc).
> 
> The atomics is actually a bad example.  These are referred to directly by part
> of the table in fs/nfs/sysctl.c.  Is there a better way of exporting
> statistics than through /proc/sys/ files?

/proc/self/mountstats

Trond

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/16] NFS: Display local caching state [try #3]

2007-08-10 Thread David Howells
Display the local caching state in /proc/fs/nfsfs/volumes.

Signed-off-by: David Howells <[EMAIL PROTECTED]>
---

 fs/nfs/client.c  |7 ---
 fs/nfs/fscache.h |   12 
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0de4db4..d350668 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1319,7 +1319,7 @@ static int nfs_volume_list_show(struct seq_file *m, void 
*v)
 
/* display header on line 1 */
if (v == &nfs_volume_list) {
-   seq_puts(m, "NV SERVER   PORT DEV FSID\n");
+   seq_puts(m, "NV SERVER   PORT DEV FSID  FSC\n");
return 0;
}
/* display one transport per line on subsequent lines */
@@ -1333,12 +1333,13 @@ static int nfs_volume_list_show(struct seq_file *m, 
void *v)
 (unsigned long long) server->fsid.major,
 (unsigned long long) server->fsid.minor);
 
-   seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
+   seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s %s\n",
   clp->cl_nfsversion,
   NIPQUAD(clp->cl_addr.sin_addr),
   ntohs(clp->cl_addr.sin_port),
   dev,
-  fsid);
+  fsid,
+  nfs_server_fscache_state(server));
 
return 0;
 }
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 44bb0d1..77f3450 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -56,6 +56,17 @@ extern void __nfs_fscache_invalidate_page(struct page *, 
struct inode *);
 extern int nfs_fscache_release_page(struct page *, gfp_t);
 
 /*
+ * indicate the client caching state as readable text
+ */
+static inline const char *nfs_server_fscache_state(struct nfs_server *server)
+{
+   if (server->nfs_client->fscache &&
+   (server->options & NFS_OPTION_FSCACHE))
+   return "yes";
+   return "no ";
+}
+
+/*
  * release the caching state associated with a page if undergoing complete page
  * invalidation
  */
@@ -110,6 +121,7 @@ static inline void nfs_fscache_unregister(void) {}
 static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
 static inline void nfs4_fscache_get_client_cookie(struct nfs_client *clp) {}
 static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
+static inline const char *nfs_server_fscache_state(struct nfs_server *server) 
{ return "no "; }
 
 static inline void nfs_fscache_init_fh_cookie(struct inode *inode) {}
 static inline void nfs_fscache_enable_fh_cookie(struct inode *inode) {}

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/16] NFS: Use local caching [try #3]

2007-08-10 Thread David Howells
The attached patch makes it possible for the NFS filesystem to make use of the
network filesystem local caching service (FS-Cache).

To be able to use this, an updated mount program is required.  This can be
obtained from:

http://people.redhat.com/steved/fscache/util-linux/

To mount an NFS filesystem to use caching, add an "fsc" option to the mount:

mount warthog:/ /a -o fsc

Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 fs/nfs/Makefile   |1 
 fs/nfs/client.c   |5 +
 fs/nfs/file.c |   51 ++
 fs/nfs/fscache-def.c  |  288 +++
 fs/nfs/fscache.c  |  374 +
 fs/nfs/fscache.h  |  144 +
 fs/nfs/inode.c|   48 +-
 fs/nfs/read.c |   28 +++
 fs/nfs/sysctl.c   |   44 +
 include/linux/nfs_fs.h|8 +
 include/linux/nfs_fs_sb.h |7 +
 11 files changed, 988 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index b55cb23..07c9345 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -16,4 +16,5 @@ nfs-$(CONFIG_NFS_V4)  += nfs4proc.o nfs4xdr.o nfs4state.o 
nfs4renewd.o \
   nfs4namespace.o
 nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
 nfs-$(CONFIG_SYSCTL) += sysctl.o
+nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-def.o
 nfs-objs   := $(nfs-y)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a49f9fe..f1783b2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -41,6 +41,7 @@
 #include "delegation.h"
 #include "iostat.h"
 #include "internal.h"
+#include "fscache.h"
 
 #define NFSDBG_FACILITYNFSDBG_CLIENT
 
@@ -137,6 +138,8 @@ static struct nfs_client *nfs_alloc_client(const char 
*hostname,
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
 #endif
 
+   nfs_fscache_get_client_cookie(clp);
+
return clp;
 
 error_3:
@@ -168,6 +171,8 @@ static void nfs_free_client(struct nfs_client *clp)
 
nfs4_shutdown_client(clp);
 
+   nfs_fscache_release_client_cookie(clp);
+
/* -EIO all pending I/O */
if (!IS_ERR(clp->cl_rpcclient))
rpc_shutdown_client(clp->cl_rpcclient);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c87dc71..d4d9c06 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -34,6 +34,8 @@
 
 #include "delegation.h"
 #include "iostat.h"
+#include "internal.h"
+#include "fscache.h"
 
 #define NFSDBG_FACILITYNFSDBG_FILE
 
@@ -54,6 +56,12 @@ static int nfs_check_flags(int flags);
 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
+static int nfs_file_page_mkwrite(struct vm_area_struct *vma, struct page 
*page);
+
+struct vm_operations_struct nfs_fs_vm_operations = {
+   .fault  = filemap_fault,
+   .page_mkwrite   = nfs_file_page_mkwrite,
+};
 
 const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
@@ -259,6 +267,9 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * 
vma)
status = nfs_revalidate_mapping(inode, file->f_mapping);
if (!status)
status = generic_file_mmap(file, vma);
+   if (!status)
+   vma->vm_ops = &nfs_fs_vm_operations;
+
return status;
 }
 
@@ -311,22 +322,48 @@ static int nfs_commit_write(struct file *file, struct 
page *page, unsigned offse
return status;
 }
 
+/*
+ * Partially or wholly invalidate a page
+ * - Release the private state associated with a page if undergoing complete
+ *   page invalidation
+ * - Called if either PG_private or PG_fscache set on the page
+ * - Caller holds page lock
+ */
 static void nfs_invalidate_page(struct page *page, unsigned long offset)
 {
if (offset != 0)
return;
/* Cancel any unstarted writes on this page */
nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE);
+
+   nfs_fscache_invalidate_page(page, page->mapping->host);
 }
 
+/*
+ * Release the private state associated with a page
+ * - Called if either PG_private or PG_fscache set on the page
+ * - Caller holds page lock
+ * - Return true (may release) or false (may not)
+ */
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
/* If PagePrivate() is set, then the page is not freeable */
-   return 0;
+   if (PagePrivate(page))
+   return 0;
+   return nfs_fscache_release_page(page, gfp);
 }
 
+/*
+ * Attempt to clear the private state associated with a page when an error
+ * occurs that requires the cached contents of an inode to be written back or
+ * destroyed
+ * - Called if either PG_private or PG_fscache set on the page
+ * - Caller holds page lock
+ * - Return 0 if successful, -error otherwise
+ */
 static int nfs_launder_page(struct page *page

[PATCH 15/16] NFS: Configuration and mount option changes to enable local caching on NFS [try #3]

2007-08-10 Thread David Howells
Changes to the kernel configuration defintions and to the NFS mount options to
allow the local caching support added by the previous patch to be enabled.

Signed-off-by: David Howells <[EMAIL PROTECTED]>
---

 fs/Kconfig|8 
 fs/nfs/client.c   |   14 ++
 fs/nfs/internal.h |2 ++
 fs/nfs/super.c|   40 ++--
 4 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 7feb4cb..76d5d16 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1600,6 +1600,14 @@ config NFS_V4
 
  If unsure, say N.
 
+config NFS_FSCACHE
+   bool "Provide NFS client caching support (EXPERIMENTAL)"
+   depends on EXPERIMENTAL
+   depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y
+   help
+ Say Y here if you want NFS data to be cached locally on disc through
+ the general filesystem cache manager
+
 config NFS_DIRECTIO
bool "Allow direct I/O on NFS files"
depends on NFS_FS
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f1783b2..0de4db4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -543,7 +543,8 @@ error:
 /*
  * Create a version 2 or 3 client
  */
-static int nfs_init_server(struct nfs_server *server, const struct 
nfs_mount_data *data)
+static int nfs_init_server(struct nfs_server *server, const struct 
nfs_mount_data *data,
+  unsigned int extra_options)
 {
struct nfs_client *clp;
int error, nfsvers = 2;
@@ -580,6 +581,7 @@ static int nfs_init_server(struct nfs_server *server, const 
struct nfs_mount_dat
server->acregmax = data->acregmax * HZ;
server->acdirmin = data->acdirmin * HZ;
server->acdirmax = data->acdirmax * HZ;
+   server->options = extra_options;
 
/* Start lockd here, before we might error out */
error = nfs_start_lockd(server);
@@ -776,6 +778,7 @@ void nfs_free_server(struct nfs_server *server)
  * - keyed on server and FSID
  */
 struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
+unsigned extra_options,
 struct nfs_fh *mntfh)
 {
struct nfs_server *server;
@@ -787,7 +790,7 @@ struct nfs_server *nfs_create_server(const struct 
nfs_mount_data *data,
return ERR_PTR(-ENOMEM);
 
/* Get a client representation */
-   error = nfs_init_server(server, data);
+   error = nfs_init_server(server, data, extra_options);
if (error < 0)
goto error;
 
@@ -911,7 +914,8 @@ error:
  * Create a version 4 volume record
  */
 static int nfs4_init_server(struct nfs_server *server,
-   const struct nfs4_mount_data *data, rpc_authflavor_t 
authflavour)
+   const struct nfs4_mount_data *data, rpc_authflavor_t 
authflavour,
+   unsigned int extra_options)
 {
int error;
 
@@ -930,6 +934,7 @@ static int nfs4_init_server(struct nfs_server *server,
server->acregmax = data->acregmax * HZ;
server->acdirmin = data->acdirmin * HZ;
server->acdirmax = data->acdirmax * HZ;
+   server->options = extra_options;
 
error = nfs_init_server_rpcclient(server, authflavour);
 
@@ -948,6 +953,7 @@ struct nfs_server *nfs4_create_server(const struct 
nfs4_mount_data *data,
  const char *mntpath,
  const char *ip_addr,
  rpc_authflavor_t authflavour,
+ unsigned int extra_options,
  struct nfs_fh *mntfh)
 {
struct nfs_fattr fattr;
@@ -967,7 +973,7 @@ struct nfs_server *nfs4_create_server(const struct 
nfs4_mount_data *data,
goto error;
 
/* set up the general RPC client */
-   error = nfs4_init_server(server, data, authflavour);
+   error = nfs4_init_server(server, data, authflavour, extra_options);
if (error < 0)
goto error;
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 76cf55d..34ef000 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -33,6 +33,7 @@ extern struct rpc_program nfs_program;
 extern void nfs_put_client(struct nfs_client *);
 extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
 extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
+   unsigned int,
struct nfs_fh *);
 extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
 const char *,
@@ -40,6 +41,7 @@ extern struct nfs_server *nfs4_create_server(const struct 
nfs4_mount_data *,
 const char *,
 const char *,
 rpc_authflavor_t,
+  

[PATCH 08/16] CacheFiles: Export things for CacheFiles [try #3]

2007-08-10 Thread David Howells
Export a number of functions for CacheFiles's use.

Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 fs/super.c   |2 ++
 kernel/auditsc.c |2 ++
 2 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index fc8ebed..c0d99dd 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -270,6 +270,8 @@ int fsync_super(struct super_block *sb)
return sync_blockdev(sb->s_bdev);
 }
 
+EXPORT_SYMBOL_GPL(fsync_super);
+
 /**
  * generic_shutdown_super  -   common helper for ->kill_sb()
  * @sb: superblock to kill
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3401293..0112179 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1526,6 +1526,8 @@ add_names:
}
 }
 
+EXPORT_SYMBOL_GPL(__audit_inode_child);
+
 /**
  * auditsc_get_stamp - get local copies of audit_context values
  * @ctx: audit_context for the task

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/16] CacheFiles: Permit an inode's security ID to be obtained [try #3]

2007-08-10 Thread David Howells
Permit an inode's security ID to be obtained by the CacheFiles module.  This is
then used as the SID with which files and directories will be created in the
cache.

Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 include/linux/security.h |   19 +++
 security/dummy.c |7 +++
 security/selinux/hooks.c |9 +
 3 files changed, 35 insertions(+), 0 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 194ef49..a54958a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -414,6 +414,11 @@ struct request_sock;
  * the size of the buffer required.
  * Returns number of bytes used/required on success.
  *
+ * @inode_get_secid:
+ * Retrieve the security ID from an inode.
+ * @inode refers to the inode to get the security ID from.
+ * @secid points the location in which to return the security ID.
+ *
  * Security hooks for file operations
  *
  * @file_permission:
@@ -1256,6 +1261,7 @@ struct security_operations {
int (*inode_getsecurity)(const struct inode *inode, const char *name, 
void *buffer, size_t size, int err);
int (*inode_setsecurity)(struct inode *inode, const char *name, const 
void *value, size_t size, int flags);
int (*inode_listsecurity)(struct inode *inode, char *buffer, size_t 
buffer_size);
+   int (*inode_get_secid)(struct inode *inode, u32 *secid);
 
int (*file_permission) (struct file * file, int mask);
int (*file_alloc_security) (struct file * file);
@@ -1818,6 +1824,13 @@ static inline int security_inode_listsecurity(struct 
inode *inode, char *buffer,
return security_ops->inode_listsecurity(inode, buffer, buffer_size);
 }
 
+static inline int security_inode_get_secid(struct inode *inode, u32 *secid)
+{
+   if (unlikely(IS_PRIVATE(inode)))
+   return 0;
+   return security_ops->inode_get_secid(inode, secid);
+}
+
 static inline int security_file_permission (struct file *file, int mask)
 {
return security_ops->file_permission (file, mask);
@@ -2518,6 +2531,12 @@ static inline int security_inode_listsecurity(struct 
inode *inode, char *buffer,
return 0;
 }
 
+static inline int security_inode_get_secid(struct inode *inode, u32 *secid)
+{
+   *secid = 0;
+   return 0;
+}
+
 static inline int security_file_permission (struct file *file, int mask)
 {
return 0;
diff --git a/security/dummy.c b/security/dummy.c
index 1a0be85..6be18fe 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -392,6 +392,12 @@ static int dummy_inode_listsecurity(struct inode *inode, 
char *buffer, size_t bu
return 0;
 }
 
+static int dummy_inode_get_secid(struct inode *inode, u32 *secid)
+{
+   *secid = 0;
+   return 0;
+}
+
 static const char *dummy_inode_xattr_getsuffix(void)
 {
return NULL;
@@ -1046,6 +1052,7 @@ void security_fixup_ops (struct security_operations *ops)
set_to_dummy_if_null(ops, inode_getsecurity);
set_to_dummy_if_null(ops, inode_setsecurity);
set_to_dummy_if_null(ops, inode_listsecurity);
+   set_to_dummy_if_null(ops, inode_get_secid);
set_to_dummy_if_null(ops, file_permission);
set_to_dummy_if_null(ops, file_alloc_security);
set_to_dummy_if_null(ops, file_free_security);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2c64ec8..34646f8 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2464,6 +2464,14 @@ static int selinux_inode_listsecurity(struct inode 
*inode, char *buffer, size_t
return len;
 }
 
+static int selinux_inode_get_secid(struct inode *inode, u32 *secid)
+{
+   struct inode_security_struct *isec = inode->i_security;
+
+   *secid = isec->sid;
+   return 0;
+}
+
 /* file security operations */
 
 static int selinux_file_permission(struct file *file, int mask)
@@ -4826,6 +4834,7 @@ static struct security_operations selinux_ops = {
.inode_getsecurity =selinux_inode_getsecurity,
.inode_setsecurity =selinux_inode_setsecurity,
.inode_listsecurity =   selinux_inode_listsecurity,
+   .inode_get_secid =  selinux_inode_get_secid,
 
.file_permission =  selinux_file_permission,
.file_alloc_security =  selinux_file_alloc_security,

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/16] CacheFiles: Get the SID under which the CacheFiles module should operate [try #3]

2007-08-10 Thread David Howells
Get the SID under which the CacheFiles module should operate so that the
SELinux security system can control the accesses it makes.

Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 include/linux/security.h |   20 
 security/dummy.c |7 +++
 security/selinux/hooks.c |7 +++
 3 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index a54958a..593a4d0 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1173,6 +1173,14 @@ struct request_sock;
  *  previously acting.
  * @oldsecid points the location in which to return the displaced security 
ID.
  *
+ * @cachefiles_get_secid:
+ * Determine the security ID for the CacheFiles module to use when
+ * accessing the filesystem containing the cache.
+ * @secid contains the security ID under which cachefiles daemon is
+ *  running.
+ * @modsecid contains the pointer to where the security ID for the module
+ * is to be stored.
+ *
  * This is the main security structure.
  */
 struct security_operations {
@@ -1361,6 +1369,7 @@ struct security_operations {
int (*set_fscreate_secid)(u32 secid, u32 *oldsecid);
int (*act_as_secid)(u32 secid, u32 *oldsecid);
int (*act_as_self)(u32 *oldsecid);
+   int (*cachefiles_get_secid)(u32 secid, u32 *modsecid);
 
 #ifdef CONFIG_SECURITY_NETWORK
int (*unix_stream_connect) (struct socket * sock,
@@ -2185,6 +2194,11 @@ static inline int security_act_as_self(u32 *oldsecid)
return security_ops->act_as_self(oldsecid);
 }
 
+static inline int security_cachefiles_get_secid(u32 secid, u32 *modsecid)
+{
+   return security_ops->cachefiles_get_secid(secid, modsecid);
+}
+
 /* prototypes */
 extern int security_init   (void);
 extern int register_security   (struct security_operations *ops);
@@ -2897,6 +2911,12 @@ static inline u32 security_act_as_self(u32 *oldsecid)
return 0;
 }
 
+static inline int security_cachefiles_get_secid(u32 secid, u32 *modsecid)
+{
+   *modsecid = 0;
+   return 0;
+}
+
 #endif /* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
diff --git a/security/dummy.c b/security/dummy.c
index 6be18fe..6e79dd4 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -960,6 +960,12 @@ static int dummy_act_as_self(u32 *oldsecid)
return 0;
 }
 
+static int dummy_cachefiles_get_secid(u32 secid, u32 *modsecid)
+{
+   *modsecid = 0;
+   return 0;
+}
+
 #ifdef CONFIG_KEYS
 static inline int dummy_key_alloc(struct key *key, struct task_struct *ctx,
  unsigned long flags)
@@ -1119,6 +1125,7 @@ void security_fixup_ops (struct security_operations *ops)
set_to_dummy_if_null(ops, set_fscreate_secid);
set_to_dummy_if_null(ops, act_as_secid);
set_to_dummy_if_null(ops, act_as_self);
+   set_to_dummy_if_null(ops, cachefiles_get_secid);
 #ifdef CONFIG_SECURITY_NETWORK
set_to_dummy_if_null(ops, unix_stream_connect);
set_to_dummy_if_null(ops, unix_may_send);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 34646f8..54542b4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4723,6 +4723,12 @@ static int selinux_act_as_self(u32 *oldsecid)
return 0;
 }
 
+static int selinux_cachefiles_get_secid(u32 secid, u32 *modsecid)
+{
+   return security_transition_sid(secid, SECINITSID_KERNEL,
+  SECCLASS_PROCESS, modsecid);
+}
+
 #ifdef CONFIG_KEYS
 
 static int selinux_key_alloc(struct key *k, struct task_struct *tsk,
@@ -4910,6 +4916,7 @@ static struct security_operations selinux_ops = {
.set_fscreate_secid =   selinux_set_fscreate_secid,
.act_as_secid = selinux_act_as_secid,
.act_as_self =  selinux_act_as_self,
+   .cachefiles_get_secid = selinux_cachefiles_get_secid,
 
 .unix_stream_connect = selinux_socket_unix_stream_connect,
.unix_may_send =selinux_socket_unix_may_send,

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/16] FS-Cache: Provide an add_wait_queue_tail() function [try #3]

2007-08-10 Thread David Howells
Provide an add_wait_queue_tail() function to add a waiter to the back of a
wait queue instead of the front.

Signed-off-by: David Howells <[EMAIL PROTECTED]>
---

 include/linux/wait.h |1 +
 kernel/wait.c|   18 ++
 2 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 0e68628..4cae7db 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -118,6 +118,7 @@ static inline int waitqueue_active(wait_queue_head_t *q)
 #define is_sync_wait(wait) (!(wait) || ((wait)->private))
 
 extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * 
wait));
+extern void FASTCALL(add_wait_queue_tail(wait_queue_head_t *q, wait_queue_t * 
wait));
 extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, 
wait_queue_t * wait));
 extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * 
wait));
 
diff --git a/kernel/wait.c b/kernel/wait.c
index 444ddbf..7acc9cc 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -29,6 +29,24 @@ void fastcall add_wait_queue(wait_queue_head_t *q, 
wait_queue_t *wait)
 }
 EXPORT_SYMBOL(add_wait_queue);
 
+/**
+ * add_wait_queue_tail - Add a waiter to the back of a waitqueue
+ * @q: the wait queue to append the waiter to
+ * @wait: the waiter to be queued
+ *
+ * Add a waiter to the back of a waitqueue so that it gets woken up last.
+ */
+void fastcall add_wait_queue_tail(wait_queue_head_t *q, wait_queue_t *wait)
+{
+   unsigned long flags;
+
+   wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+   spin_lock_irqsave(&q->lock, flags);
+   __add_wait_queue_tail(q, wait);
+   spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_tail);
+
 void fastcall add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t 
*wait)
 {
unsigned long flags;

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/16] CacheFiles: Permit a process's create SID to be overridden [try #3]

2007-08-10 Thread David Howells
Make it possible for a process's file creation SID to be temporarily overridden
by CacheFiles so that files created in the cache have the right label attached.

Without this facility, files created in the cache will be given the current
file creation SID of whatever process happens to have invoked CacheFiles
indirectly by means of opening a netfs file at the time the cache file is
created.

Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 include/linux/security.h |   39 +++
 security/dummy.c |   14 ++
 security/selinux/hooks.c |   20 
 3 files changed, 73 insertions(+), 0 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index c11dc8a..edd1677 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1147,6 +1147,15 @@ struct request_sock;
  * @secdata contains the security context.
  * @seclen contains the length of the security context.
  *
+ * @get_fscreate_secid:
+ * Get the current FS security ID.
+ * @secid points the location in which to return the security ID.
+ *
+ * @set_fscreate_secid:
+ * Set the current FS security ID.
+ * @secid contains the security ID to set.
+ * @oldsecid points the location in which to return the old security ID.
+ *
  * This is the main security structure.
  */
 struct security_operations {
@@ -1330,6 +1339,8 @@ struct security_operations {
int (*setprocattr)(struct task_struct *p, char *name, void *value, 
size_t size);
int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen);
void (*release_secctx)(char *secdata, u32 seclen);
+   int (*get_fscreate_secid)(u32 *secid);
+   int (*set_fscreate_secid)(u32 secid, u32 *oldsecid);
 
 #ifdef CONFIG_SECURITY_NETWORK
int (*unix_stream_connect) (struct socket * sock,
@@ -2127,6 +2138,16 @@ static inline void security_release_secctx(char 
*secdata, u32 seclen)
return security_ops->release_secctx(secdata, seclen);
 }
 
+static inline int security_get_fscreate_secid(u32 *secid)
+{
+   return security_ops->get_fscreate_secid(secid);
+}
+
+static inline int security_set_fscreate_secid(u32 secid, u32 *oldsecid)
+{
+   return security_ops->set_fscreate_secid(secid, oldsecid);
+}
+
 /* prototypes */
 extern int security_init   (void);
 extern int register_security   (struct security_operations *ops);
@@ -2795,6 +2816,11 @@ static inline void securityfs_remove(struct dentry 
*dentry)
 {
 }
 
+static inline int security_to_secctx_secid(char *secdata, u32 seclen, u32 
*secid)
+{
+   return -EOPNOTSUPP;
+}
+
 static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 
*seclen)
 {
return -EOPNOTSUPP;
@@ -2803,6 +2829,19 @@ static inline int security_secid_to_secctx(u32 secid, 
char **secdata, u32 *secle
 static inline void security_release_secctx(char *secdata, u32 seclen)
 {
 }
+
+static inline int security_get_fscreate_secid(u32 *secid)
+{
+   *secid = 0;
+   return 0;
+}
+
+static inline int security_set_fscreate_secid(u32 secid, u32 *oldsecid)
+{
+   *oldsecid = 0;
+   return 0;
+}
+
 #endif /* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
diff --git a/security/dummy.c b/security/dummy.c
index 19d813d..9e81edb 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -930,6 +930,18 @@ static void dummy_release_secctx(char *secdata, u32 seclen)
 {
 }
 
+static int dummy_get_fscreate_secid(u32 *secid)
+{
+   *secid = 0;
+   return 0;
+}
+
+static int dummy_set_fscreate_secid(u32 secid, u32 *oldsecid)
+{
+   *oldsecid = 0;
+   return 0;
+}
+
 #ifdef CONFIG_KEYS
 static inline int dummy_key_alloc(struct key *key, struct task_struct *ctx,
  unsigned long flags)
@@ -1084,6 +1096,8 @@ void security_fixup_ops (struct security_operations *ops)
set_to_dummy_if_null(ops, setprocattr);
set_to_dummy_if_null(ops, secid_to_secctx);
set_to_dummy_if_null(ops, release_secctx);
+   set_to_dummy_if_null(ops, get_fscreate_secid);
+   set_to_dummy_if_null(ops, set_fscreate_secid);
 #ifdef CONFIG_SECURITY_NETWORK
set_to_dummy_if_null(ops, unix_stream_connect);
set_to_dummy_if_null(ops, unix_may_send);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6237933..f82a03d 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4661,6 +4661,24 @@ static void selinux_release_secctx(char *secdata, u32 
seclen)
kfree(secdata);
 }
 
+static int selinux_get_fscreate_secid(u32 *secid)
+{
+   struct task_security_struct *tsec = current->security;
+
+   *secid = tsec->create_sid;
+   return 0;
+}
+
+static int selinux_set_fscreate_secid(u32 secid, u32 *oldsecid)
+{
+   struct task_security_struct *tsec = current->security;
+   u32 oldsid = tsec->create_sid;
+
+   tsec->create_sid = secid;
+   *oldsecid = oldsid;
+   return 0;
+}
+
 #ifdef CO

[PATCH 10/16] CacheFiles: Add an act-as SID override in task_security_struct [try #3]

2007-08-10 Thread David Howells
Add an act-as SID to task_security_struct that is equivalent to fsuid/fsgid in
task_struct.  This permits a task to perform operations as if it is the
overriding SID, without changing its own SID as that might be needed to control
access to the process by ptrace, signals, /proc, etc.

This is useful for CacheFiles in that it allows CacheFiles to access the cache
files and directories using the cache's security context rather than the
security context of the process on whose behalf it is working, and in the
context of which it is running.

Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 include/linux/security.h  |   36 
 security/dummy.c  |   14 +++
 security/selinux/exports.c|2 
 security/selinux/hooks.c  |  162 +++--
 security/selinux/include/objsec.h |1 
 security/selinux/selinuxfs.c  |2 
 security/selinux/xfrm.c   |6 +
 7 files changed, 156 insertions(+), 67 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index edd1677..194ef49 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1156,6 +1156,18 @@ struct request_sock;
  * @secid contains the security ID to set.
  * @oldsecid points the location in which to return the old security ID.
  *
+ * @act_as_secid:
+ * Set the security ID as which to act, returning the security ID as which
+ *  the process was previously acting.
+ * @secid contains the security ID to act as.
+ * @oldsecid points the location in which to return the displaced security 
ID.
+ *
+ * @act_as_self:
+ * Reset the security ID as which to act to be the same as the process's
+ *  owning security ID, and return the security ID as which the process was
+ *  previously acting.
+ * @oldsecid points the location in which to return the displaced security 
ID.
+ *
  * This is the main security structure.
  */
 struct security_operations {
@@ -1341,6 +1353,8 @@ struct security_operations {
void (*release_secctx)(char *secdata, u32 seclen);
int (*get_fscreate_secid)(u32 *secid);
int (*set_fscreate_secid)(u32 secid, u32 *oldsecid);
+   int (*act_as_secid)(u32 secid, u32 *oldsecid);
+   int (*act_as_self)(u32 *oldsecid);
 
 #ifdef CONFIG_SECURITY_NETWORK
int (*unix_stream_connect) (struct socket * sock,
@@ -2148,6 +2162,16 @@ static inline int security_set_fscreate_secid(u32 secid, 
u32 *oldsecid)
return security_ops->set_fscreate_secid(secid, oldsecid);
 }
 
+static inline int security_act_as_secid(u32 secid, u32 *oldsecid)
+{
+   return security_ops->act_as_secid(secid, oldsecid);
+}
+
+static inline int security_act_as_self(u32 *oldsecid)
+{
+   return security_ops->act_as_self(oldsecid);
+}
+
 /* prototypes */
 extern int security_init   (void);
 extern int register_security   (struct security_operations *ops);
@@ -2842,6 +2866,18 @@ static inline int security_set_fscreate_secid(u32 secid, 
u32 *oldsecid)
return 0;
 }
 
+static inline int security_act_as_secid(u32 secid, u32 *oldsecid)
+{
+   *oldsecid = 0;
+   return 0;
+}
+
+static inline u32 security_act_as_self(u32 *oldsecid)
+{
+   *oldsecid = 0;
+   return 0;
+}
+
 #endif /* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
diff --git a/security/dummy.c b/security/dummy.c
index 9e81edb..1a0be85 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -942,6 +942,18 @@ static int dummy_set_fscreate_secid(u32 secid, u32 
*oldsecid)
return 0;
 }
 
+static int dummy_act_as_secid(u32 secid, u32 *oldsecid)
+{
+   *oldsecid = 0;
+   return 0;
+}
+
+static int dummy_act_as_self(u32 *oldsecid)
+{
+   *oldsecid = 0;
+   return 0;
+}
+
 #ifdef CONFIG_KEYS
 static inline int dummy_key_alloc(struct key *key, struct task_struct *ctx,
  unsigned long flags)
@@ -1098,6 +1110,8 @@ void security_fixup_ops (struct security_operations *ops)
set_to_dummy_if_null(ops, release_secctx);
set_to_dummy_if_null(ops, get_fscreate_secid);
set_to_dummy_if_null(ops, set_fscreate_secid);
+   set_to_dummy_if_null(ops, act_as_secid);
+   set_to_dummy_if_null(ops, act_as_self);
 #ifdef CONFIG_SECURITY_NETWORK
set_to_dummy_if_null(ops, unix_stream_connect);
set_to_dummy_if_null(ops, unix_may_send);
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index b6f9694..b559699 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -79,7 +79,7 @@ int selinux_relabel_packet_permission(u32 sid)
if (selinux_enabled) {
struct task_security_struct *tsec = current->security;
 
-   return avc_has_perm(tsec->sid, sid, SECCLASS_PACKET,
+   return avc_has_perm(tsec->actor_sid, sid, SECCLASS_PACKET,
PACKET__RELABELTO, NULL);
}
return 0;
diff --git a/security/selinux/h

[PATCH 07/16] CacheFiles: Permit the page lock state to be monitored [try #3]

2007-08-10 Thread David Howells
Add a function to install a monitor on the page lock waitqueue for a particular
page, thus allowing the page being unlocked to be detected.

This is used by CacheFiles to detect read completion on a page in the backing
filesystem so that it can then copy the data to the waiting netfs page.

Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 include/linux/pagemap.h |5 +
 mm/filemap.c|   19 +++
 2 files changed, 24 insertions(+), 0 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d1049b6..452fdcf 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -220,6 +220,11 @@ static inline void wait_on_page_fscache_write(struct page 
*page)
 extern void end_page_fscache_write(struct page *page);
 
 /*
+ * Add an arbitrary waiter to a page's wait queue
+ */
+extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);
+
+/*
  * Fault a userspace page into pagetables.  Return non-zero on a fault.
  *
  * This assumes that two userspace pages are always sufficient.  That's
diff --git a/mm/filemap.c b/mm/filemap.c
index 5e419a2..c60c24e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -518,6 +518,25 @@ void fastcall wait_on_page_bit(struct page *page, int 
bit_nr)
 EXPORT_SYMBOL(wait_on_page_bit);
 
 /**
+ * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
+ * @page - Page defining the wait queue of interest
+ * @waiter - Waiter to add to the queue
+ *
+ * Add an arbitrary @waiter to the wait queue for the nominated @page.
+ */
+void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
+{
+   wait_queue_head_t *q = page_waitqueue(page);
+   unsigned long flags;
+
+   spin_lock_irqsave(&q->lock, flags);
+   __add_wait_queue(q, waiter);
+   spin_unlock_irqrestore(&q->lock, flags);
+}
+
+EXPORT_SYMBOL_GPL(add_page_wait_queue);
+
+/**
  * unlock_page - unlock a locked page
  * @page: the page
  *

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/16] FS-Cache: Release page->private after failed readahead [try #3]

2007-08-10 Thread David Howells
The attached patch causes read_cache_pages() to release page-private data on a
page for which add_to_page_cache() fails or the filler function fails. This
permits pages with caching references associated with them to be cleaned up.

The invalidatepage() address space op is called (indirectly) to do the honours.

Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 mm/readahead.c |   40 ++--
 1 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/mm/readahead.c b/mm/readahead.c
index 39bf45d..12d1378 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
@@ -51,6 +52,41 @@ EXPORT_SYMBOL_GPL(file_ra_state_init);
 
 #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
 
+/*
+ * see if a page needs releasing upon read_cache_pages() failure
+ * - the caller of read_cache_pages() may have set PG_private before calling,
+ *   such as the NFS fs marking pages that are cached locally on disk, thus we
+ *   need to give the fs a chance to clean up in the event of an error
+ */
+static void read_cache_pages_invalidate_page(struct address_space *mapping,
+struct page *page)
+{
+   if (PagePrivate(page)) {
+   if (TestSetPageLocked(page))
+   BUG();
+   page->mapping = mapping;
+   do_invalidatepage(page, 0);
+   page->mapping = NULL;
+   unlock_page(page);
+   }
+   page_cache_release(page);
+}
+
+/*
+ * release a list of pages, invalidating them first if need be
+ */
+static void read_cache_pages_invalidate_pages(struct address_space *mapping,
+ struct list_head *pages)
+{
+   struct page *victim;
+
+   while (!list_empty(pages)) {
+   victim = list_to_page(pages);
+   list_del(&victim->lru);
+   read_cache_pages_invalidate_page(mapping, victim);
+   }
+}
+
 /**
  * read_cache_pages - populate an address space with some pages & start reads 
against them
  * @mapping: the address_space
@@ -74,14 +110,14 @@ int read_cache_pages(struct address_space *mapping, struct 
list_head *pages,
page = list_to_page(pages);
list_del(&page->lru);
if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) {
-   page_cache_release(page);
+   read_cache_pages_invalidate_page(mapping, page);
continue;
}
ret = filler(data, page);
if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add(&lru_pvec);
if (ret) {
-   put_pages_list(pages);
+   read_cache_pages_invalidate_pages(mapping, pages);
break;
}
task_io_account_read(PAGE_CACHE_SIZE);

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/16] CacheFiles: Add a hook to write a single page of data to an inode [try #3]

2007-08-10 Thread David Howells
Add an address space operation to write one single page of data to an inode at
a page-aligned location (thus permitting the implementation to be highly
optimised).

This is used by CacheFiles to store the contents of netfs pages into their
backing file pages.

Supply a generic implementation for this that uses the prepare_write() and
commit_write() address_space operations to bound a copy directly into the page
cache.

Hook the Ext2 and Ext3 operations to the generic implementation.

Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 fs/ext2/inode.c|2 +
 fs/ext3/inode.c|3 ++
 include/linux/fs.h |7 
 mm/filemap.c   |   95 
 4 files changed, 107 insertions(+), 0 deletions(-)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0079b2c..b3e4b50 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -695,6 +695,7 @@ const struct address_space_operations ext2_aops = {
.direct_IO  = ext2_direct_IO,
.writepages = ext2_writepages,
.migratepage= buffer_migrate_page,
+   .write_one_page = generic_file_buffered_write_one_page,
 };
 
 const struct address_space_operations ext2_aops_xip = {
@@ -713,6 +714,7 @@ const struct address_space_operations ext2_nobh_aops = {
.direct_IO  = ext2_direct_IO,
.writepages = ext2_writepages,
.migratepage= buffer_migrate_page,
+   .write_one_page = generic_file_buffered_write_one_page,
 };
 
 /*
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index de4e316..93809eb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1713,6 +1713,7 @@ static const struct address_space_operations 
ext3_ordered_aops = {
.releasepage= ext3_releasepage,
.direct_IO  = ext3_direct_IO,
.migratepage= buffer_migrate_page,
+   .write_one_page = generic_file_buffered_write_one_page,
 };
 
 static const struct address_space_operations ext3_writeback_aops = {
@@ -1727,6 +1728,7 @@ static const struct address_space_operations 
ext3_writeback_aops = {
.releasepage= ext3_releasepage,
.direct_IO  = ext3_direct_IO,
.migratepage= buffer_migrate_page,
+   .write_one_page = generic_file_buffered_write_one_page,
 };
 
 static const struct address_space_operations ext3_journalled_aops = {
@@ -1740,6 +1742,7 @@ static const struct address_space_operations 
ext3_journalled_aops = {
.bmap   = ext3_bmap,
.invalidatepage = ext3_invalidatepage,
.releasepage= ext3_releasepage,
+   .write_one_page = generic_file_buffered_write_one_page,
 };
 
 void ext3_set_aops(struct inode *inode)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6bf1395..1b1f288 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -433,6 +433,11 @@ struct address_space_operations {
int (*migratepage) (struct address_space *,
struct page *, struct page *);
int (*launder_page) (struct page *);
+   /* write the contents of the source page over the page at the specified
+* index in the target address space (the source page does not need to
+* be related to the target address space) */
+   int (*write_one_page)(struct address_space *, pgoff_t, struct page *);
+
 };
 
 struct backing_dev_info;
@@ -1669,6 +1674,8 @@ extern ssize_t generic_file_direct_write(struct kiocb *, 
const struct iovec *,
unsigned long *, loff_t, loff_t *, size_t, size_t);
 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec 
*,
unsigned long, loff_t, loff_t *, size_t, ssize_t);
+extern int generic_file_buffered_write_one_page(struct address_space *,
+   pgoff_t, struct page *);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, 
loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t 
len, loff_t *ppos);
 extern void do_generic_mapping_read(struct address_space *mapping,
diff --git a/mm/filemap.c b/mm/filemap.c
index 7b96487..5e419a2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2032,6 +2032,101 @@ zero_length_segment:
 }
 EXPORT_SYMBOL(generic_file_buffered_write);
 
+/**
+ * generic_file_buffered_write_one_page - Write a single page of data to an
+ * inode
+ * @mapping - The address space of the target inode
+ * @index - The target page in the target inode to fill
+ * @source - The data to write into the target page
+ *
+ * Write the data from the source page to the page in the nominated address
+ * space at the @index specified.  Note that the file will not be extended if
+ * the page crosses the EOF marker, in which case only the first part of the
+ * page will be written.
+ *
+ * The @source page does not need to have any association with the file or the
+ * target page offset.
+ */
+int generic_fil

[PATCH 05/16] CacheFiles: Add missing copy_page export for ia64 [try #3]

2007-08-10 Thread David Howells
This one-line patch fixes the missing export of copy_page introduced
by the cachefile patches.  This patch is not yet upstream, but is required
for cachefile on ia64.  It will be pushed upstream when cachefile goes
upstream.

Signed-off-by: Prarit Bhargava <[EMAIL PROTECTED]>
Signed-Off-By: David Howells <[EMAIL PROTECTED]>
---

 arch/ia64/kernel/ia64_ksyms.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index bd17190..20c3546 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -43,6 +43,7 @@ EXPORT_SYMBOL(__do_clear_user);
 EXPORT_SYMBOL(__strlen_user);
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
+EXPORT_SYMBOL(copy_page);
 
 /* from arch/ia64/lib */
 extern void __divsi3(void);

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/16] FS-Cache: Recruit a couple of page flags for cache management [try #3]

2007-08-10 Thread David Howells
Recruit a couple of page flags to aid in cache management.  The following extra
flags are defined:

 (1) PG_fscache (PG_owner_priv_2)

 The marked page is backed by a local cache and is pinning resources in the
 cache driver.

 (2) PG_fscache_write (PG_owner_priv_3)

 The marked page is being written to the local cache.  The page may not be
 modified whilst this is in progress.

If PG_fscache is set, then things that checked for PG_private will now also
check for that.  This includes things like truncation and page invalidation.
The function page_has_private() had been added to detect this.

Signed-off-by: David Howells <[EMAIL PROTECTED]>
---

 fs/splice.c|2 +-
 include/linux/page-flags.h |   30 +-
 include/linux/pagemap.h|   11 +++
 mm/filemap.c   |   16 
 mm/migrate.c   |2 +-
 mm/page_alloc.c|3 +++
 mm/readahead.c |9 +
 mm/swap.c  |4 ++--
 mm/swap_state.c|4 ++--
 mm/truncate.c  |   10 +-
 mm/vmscan.c|2 +-
 11 files changed, 76 insertions(+), 17 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index c010a72..ae4f5b7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -58,7 +58,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info 
*pipe,
 */
wait_on_page_writeback(page);
 
-   if (PagePrivate(page))
+   if (page_has_private(page))
try_to_release_page(page, GFP_KERNEL);
 
/*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 209d3a4..eaf9854 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -83,19 +83,24 @@
 #define PG_private 11  /* If pagecache, has fs-private data */
 
 #define PG_writeback   12  /* Page is under writeback */
+#define PG_owner_priv_213  /* Owner use. If pagecache, fs 
may use */
 #define PG_compound14  /* Part of a compound page */
 #define PG_swapcache   15  /* Swap page: swp_entry_t in private */
 
 #define PG_mappedtodisk16  /* Has blocks allocated on-disk 
*/
 #define PG_reclaim 17  /* To be reclaimed asap */
+#define PG_owner_priv_318  /* Owner use. If pagecache, fs 
may use */
 #define PG_buddy   19  /* Page is free, on buddy lists */
 
 /* PG_readahead is only used for file reads; PG_reclaim is only for writes */
 #define PG_readahead   PG_reclaim /* Reminder to do async read-ahead */
 
-/* PG_owner_priv_1 users should have descriptive aliases */
+/* PG_owner_priv_1/2/3 users should have descriptive aliases */
 #define PG_checked PG_owner_priv_1 /* Used by some filesystems */
 #define PG_pinned  PG_owner_priv_1 /* Xen pinned pagetable */
+#define PG_fscache PG_owner_priv_2 /* Backed by local cache */
+#define PG_fscache_write   PG_owner_priv_3 /* Writing to local cache */
+
 
 #if (BITS_PER_LONG > 32)
 /*
@@ -199,6 +204,18 @@ static inline void SetPageUptodate(struct page *page)
 #define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback,  \
&(page)->flags)
 
+#define PageFsCache(page)  test_bit(PG_fscache, &(page)->flags)
+#define SetPageFsCache(page)   set_bit(PG_fscache, &(page)->flags)
+#define ClearPageFsCache(page) clear_bit(PG_fscache, &(page)->flags)
+#define TestSetPageFsCache(page) test_and_set_bit(PG_fscache, &(page)->flags)
+#define TestClearPageFsCache(page) test_and_clear_bit(PG_fscache, 
&(page)->flags)
+
+#define PageFsCacheWrite(page) test_bit(PG_fscache_write, 
&(page)->flags)
+#define SetPageFsCacheWrite(page)  set_bit(PG_fscache_write, 
&(page)->flags)
+#define ClearPageFsCacheWrite(page)clear_bit(PG_fscache_write, 
&(page)->flags)
+#define TestSetPageFsCacheWrite(page)  test_and_set_bit(PG_fscache_write, 
&(page)->flags)
+#define TestClearPageFsCacheWrite(page)
test_and_clear_bit(PG_fscache_write, &(page)->flags)
+
 #define PageBuddy(page)test_bit(PG_buddy, &(page)->flags)
 #define __SetPageBuddy(page)   __set_bit(PG_buddy, &(page)->flags)
 #define __ClearPageBuddy(page) __clear_bit(PG_buddy, &(page)->flags)
@@ -272,4 +289,15 @@ static inline void set_page_writeback(struct page *page)
test_set_page_writeback(page);
 }
 
+/**
+ * page_has_private - Determine if page has private stuff
+ * @page: The page to be checked
+ *
+ * Determine if a page has private stuff, indicating that release routines
+ * should be invoked upon it.
+ */
+#define page_has_private(page) \
+   ((page)->flags & ((1 << PG_private) |   \
+ (1 << PG_fscache)))
+
 #endif /* PAGE_FLAGS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
ind

[PATCH 00/16] Permit filesystem local caching [try #3]

2007-08-10 Thread David Howells


These patches add local caching for network filesystems such as NFS and AFS.

FS-Cache now runs fully asynchronously as required by Trond Myklebust for NFS.

--
Changes:
[try #3]:

 (*) Added missing file to CacheFiles patch.

 (*) Made new security functions return errors and pass actual return data via
 argument pointer.

 (*) Cleaned up NFS patch.

 (*) The 'fsc' flag must now be passed to NFS mount by the string options.

 (*) Split the NFS patch into three as requested by Trond.

[try #2]:

 (*) The CacheFiles module no longer accepts directory fds in its cull and
 inuse commands from cachefilesd.  Instead it uses the current working
 directory of the calling process as the basis for looking up the object.
 Corollary to this, fget_light() no longer needs to be exported.

--
A tarball of the patches is available at:


http://people.redhat.com/~dhowells/fscache/patches/nfs+fscache-22.tar.bz2


To use this version of CacheFiles, the cachefilesd-0.9 is also required.  It
is available as an SRPM:

http://people.redhat.com/~dhowells/fscache/cachefilesd-0.9-1.fc7.src.rpm

Or as individual bits:

http://people.redhat.com/~dhowells/fscache/cachefilesd-0.9.tar.bz2
http://people.redhat.com/~dhowells/fscache/cachefilesd.fc
http://people.redhat.com/~dhowells/fscache/cachefilesd.if
http://people.redhat.com/~dhowells/fscache/cachefilesd.te
http://people.redhat.com/~dhowells/fscache/cachefilesd.spec

The .fc, .if and .te files are for manipulating SELinux.

David
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 4/4] VFS: allow filesystem to override mknod capability checks

2007-08-10 Thread Serge E. Hallyn
Quoting Miklos Szeredi ([EMAIL PROTECTED]):
> > > From: Miklos Szeredi <[EMAIL PROTECTED]>
> > > 
> > > Add a new filesystem flag, that results in the VFS not checking if the
> > > current process has enough privileges to do an mknod().
> > > 
> > > This is needed on filesystems, where an unprivileged user may be able
> > > to create a device node, without causing security problems.
> > > 
> > > One such example is "mountlo" a loopback mount utility implemented
> > > with fuse and UML, which runs as an unprivileged userspace process.
> > > In this case the user does in fact have the right to create device
> > > nodes within the filesystem image, as long as the user has write
> > > access to the image.  Since the filesystem is mounted with "nodev",
> > > adding device nodes is not a security concern.
> > 
> > Could we enforce at do_new_mount() that if
> > type->fs_flags&FS_MKNOD_CHECKS_PERM then mnt_flags |= MS_NODEV?
> 
> Well, the problem with that is, there will be fuse filesystems which
> will want devices to work

Crud, sorry, I forgot all fuse filesystems will have the same fs_flags.

> and for those the capability checks will be
> reenabled inside ->mknod().  In fact, for backward compatibility all
> filesystems will have the mknod checks, except ones which explicitly
> request to turn it off.
> 
> Since unprivileged fuse mounts always have "nodev", the only way

Ah yes, I'd forgotten that we do if (!capable(mknod)) mnt_flags |= MNT_NODEV

No objections then anyway.  Thanks for indulging me :)

> security could be screwed up, is if a filesystem running with
> privileges disabled the mknod checks.
> 
> I will probably add some safety guards against that into the fuse
> library, but of course there's no way to stop a privileged user from
> screwing up security anyway.

Agreed.

> If for example there's a loop mount, where the disk image file is
> writable by a user, and root mounts it without "nodev", the user can
> still create device nodes (by modifying the image) even if the mknod
> checks are enabled.

thanks,
-serge

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 14/14] NFS: Use local caching [try #2]

2007-08-10 Thread David Howells
Trond Myklebust <[EMAIL PROTECTED]> wrote:

> > > Dang, that's a lot of inlines... AFAICS, approx half of fs/nfs/fscache.h
> > > should really be moved into fscache.c.
> > 
> > If you wish.  It seems a shame since a lot of them have only one caller.
> 
> ...however it also forces you to export a lot of stuff which is really
> private to fscache.c (the atomics etc).

The atomics is actually a bad example.  These are referred to directly by part
of the table in fs/nfs/sysctl.c.  Is there a better way of exporting
statistics than through /proc/sys/ files?

David
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 11/14] CacheFiles: Permit an inode's security ID to be obtained [try #2]

2007-08-10 Thread David Howells
Casey Schaufler <[EMAIL PROTECTED]> wrote:

> Grumble. Yet another thing to undo in the near future. I still
> hope to suggest what I would consider a viable alternative "soon".

Use a struct key with the overrides attached?  The key can be generated by
SELinux or whatever module is there.

David
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html