Re: [PATCH 6/11] eCryptfs: Update metadata read/write functions
On Wed, Sep 19, 2007 at 10:48:17PM -0700, Andrew Morton wrote: > On Mon, 17 Sep 2007 16:48:44 -0500 Michael Halcrow <[EMAIL PROTECTED]> wrote: > > + if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, > > checkpatch missed the assignment-in-an-if here. Fix an assignment-in-an-if. Signed-off-by: Michael Halcrow <[EMAIL PROTECTED]> --- diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 4bf1a95..b3795f6 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1306,8 +1306,9 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, int header_pages; int rc; - if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, page_virt, - 0, PAGE_CACHE_SIZE))) { + rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, page_virt, + 0, PAGE_CACHE_SIZE); + if (rc) { printk(KERN_ERR "%s: Error attempting to write header " "information to lower file; rc = [%d]\n", __FUNCTION__, rc); - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/11] eCryptfs: read_write.c routines
On Wed, Sep 19, 2007 at 10:38:50PM -0700, Andrew Morton wrote: > > + offset = (page_for_lower->index << PAGE_CACHE_SHIFT) + offset_in_page; > > bug. You need to cast page.index to loff_t before shifting. > > I'd fix it on the spot, but this would be a good time to review the > whole patchset and perhaps the whole fs for this easy-to-do, > hard-to-find bug. Update data types and add casts in order to avoid potential overflow issues. Signed-off-by: Michael Halcrow <[EMAIL PROTECTED]> --- diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 5d27cf9..4bf1a95 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -149,7 +149,7 @@ out: * ecryptfs_derive_iv * @iv: destination for the derived iv vale * @crypt_stat: Pointer to crypt_stat struct for the current inode - * @offset: Offset of the page whose's iv we are to derive + * @offset: Offset of the extent whose IV we are to derive * * Generate the initialization vector from the given root IV and page * offset. @@ -157,7 +157,7 @@ out: * Returns zero on success; non-zero on error. */ static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, - pgoff_t offset) + loff_t offset) { int rc = 0; char dst[MD5_DIGEST_SIZE]; @@ -173,7 +173,7 @@ static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, * hashing business. -Halcrow */ memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes); memset((src + crypt_stat->iv_bytes), 0, 16); - snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset); + snprintf((src + crypt_stat->iv_bytes), 16, "%lld", offset); if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "source:\n"); ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16)); @@ -384,11 +384,11 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, struct page *page, unsigned long extent_offset) { - unsigned long extent_base; + loff_t extent_base; char extent_iv[ECRYPTFS_MAX_IV_BYTES]; int rc; - extent_base = (page->index + extent_base = (((loff_t)page->index) * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); rc = ecryptfs_derive_iv(extent_iv, crypt_stat, (extent_base + extent_offset)); @@ -492,8 +492,9 @@ int ecryptfs_encrypt_page(struct page *page) goto out; } ecryptfs_lower_offset_for_extent( - &offset, ((page->index * (PAGE_CACHE_SIZE - / crypt_stat->extent_size)) + &offset, loff_t)page->index) + * (PAGE_CACHE_SIZE + / crypt_stat->extent_size)) + extent_offset), crypt_stat); rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, offset, crypt_stat->extent_size); @@ -515,11 +516,11 @@ static int ecryptfs_decrypt_extent(struct page *page, struct page *enc_extent_page, unsigned long extent_offset) { - unsigned long extent_base; + loff_t extent_base; char extent_iv[ECRYPTFS_MAX_IV_BYTES]; int rc; - extent_base = (page->index + extent_base = (((loff_t)page->index) * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); rc = ecryptfs_derive_iv(extent_iv, crypt_stat, (extent_base + extent_offset)); @@ -1320,7 +1321,7 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, while (current_header_page < header_pages) { loff_t offset; - offset = (current_header_page << PAGE_CACHE_SHIFT); + offset = (((loff_t)current_header_page) << PAGE_CACHE_SHIFT); if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, page_virt, offset, PAGE_CACHE_SIZE))) { diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index c6a8a33..4eb09c1 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -127,7 +127,8 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, int rc = 0; while (extent_num_in_page < num_extents_per_page) { - loff_t view_extent_num = ((page->index * num_extents_per_page) + loff_t view_extent_num = loff_t)page->index) + * num_extents_per_page) + extent_num_in_page); if (view_extent_num < crypt_stat->num_header_extents_at_front) { @@ -418,
Re: [00/41] Large Blocksize Support V7 (adds memmap support)
On Fri, 21 Sep 2007, Hugh Dickins wrote: > I've found some fixes needed on top of your Large Blocksize Support > patches: I'll send those to you in a moment. Looks like you didn't > try much swapping! yup. Thanks for looking at it. > > I only managed to get ext2 working with larger blocksizes: > reiserfs -b 8192 wouldn't mount ("reiserfs_fill_super: can not find > reiserfs on /dev/sdb1"); ext3 gave me mysterious errors ("JBD: tar > wants too many credits", even after adding JBD patches that you > turned out to be depending on); and I didn't try ext4 or xfs > (I'm guessing the latter has been quite well tested ;) Yes, there were issues with the first releases of the JBD patches. The current crop in mm is fine but much of that may have bypassed this list. - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 2.6.22.6: kernel BUG at fs/locks.c:171
On Fri, 2007-09-14 at 07:22 +1000, Nick Piggin wrote: > On Friday 14 September 2007 16:02, Soeren Sonnenburg wrote: > > On Thu, 2007-09-13 at 09:51 +1000, Nick Piggin wrote: > > > On Thursday 13 September 2007 19:20, Soeren Sonnenburg wrote: > > > > Dear all, > > > > > > > > I've just seen this in dmesg on a AMD K7 / kernel 2.6.22.6 machine > > > > (config attached). > > > > > > > > Any ideas / which further information needed ? > > > > > > Thanks for the report. Is it reproduceable? It seems like the > > > locks_free_lock call that's oopsing is coming from __posix_lock_file. > > > The actual function looks fine, but the lock being freed could have > > > been corrupted if there was slab corruption, or a hardware corruption. > > > > > > You could: try running memtest86+ overnight. And try the following > > > patch and turn on slab debugging then try to reproduce the problem. > > > > OK so far I've run memtest86+ 1.40 from freedos for 8 hrs (v1.70 hung on > > startup) - nothing. > > Thanks. > > > Could this corruption be caused by a pci card/driver? I am asking as I > > am using a new dvb-t card (asus p7131) and the oops happened after 5 or > > 6 days of uptime just about a day after watching some movie (very bad > > reception/lots of errors). > > It could be caused by that, definitely. slab debugging plus my earlier > patch may help to narrow it down. (or stress testing with / without the > dvb card in action). OK, it is the dvb card. I have 1 week of uptime now without any errors. Only change is the dvb driver (saa7146) not loaded. :( Soeren - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [00/41] Large Blocksize Support V7 (adds memmap support)
On Sun, Sep 23, 2007 at 08:56:39AM +0200, Goswin von Brederlow wrote: > As a user I know it because I didn't put a kernel source into /tmp. A > programm can't reasonably know that. Various apps requires you (admin/user) to tune the size of their caches. Seems like you never tried to setup a database, oh well. > Xen has its own memory pool and can quite agressively reclaim memory > from dom0 when needed. I just ment to say that the number in The whole point is if there's not enough ram of course... this is why you should check. > /proc/meminfo can change in a second so it is not much use knowing > what it said last minute. The numbers will change depending on what's running on your system. It's up to you to know plus I normally keep vmstat monitored in the background to see how the cache/free levels change over time. Those numbers are worthless if they could be fragmented... > I would kill any programm that does that to find out how much free ram > the system has. The admin should do that if he's unsure, not a program of course! - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
> > But it's has various dawbacks, like rmdir doesn't work if there are > > open files within an otherwise empty directory. > > > > I'd happily accept suggestions on how to deal with this differenty. > > NFS has that problem because it really has to sillyrename into the same > directory. I don't see that ssh/sftp needs to do that. Instead it can > sillyrename anywhere in the filesystem. I don't think it can. How can we find in a reliable way another directory, which is writable by the user? Miklos - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
> On Mon, Sep 24, 2007 at 03:18:10PM +0200, Miklos Szeredi wrote: > > > Or not support such a broken protocol at all. > > > > Wonder what people would say if we removed support for NFSv[23]. > > > > Just because a protocol does not support "perfect" UNIX semantics, it > > doesn't mean it's broken. By that standard almost all network > > filesystem protocols are severely broken. > > Well, they are broken by these and other standards. At least nfs and > cifs maintainers do the workarounds for this brokeness where they belong. And my patch is not working around a problem, rather solving a problem in a correct way. Let me summarise it: There are valid reasons we have fstat() in addition to stat/lstat. For example we want to protect agains races involving unlink/rename on an open file. Say I want to implement a network filesystem with a pure unprivileged userspace sever (this is basically what sshfs does). I want my filesystem client implementation to keep all these advantages of fstat(). So how can I do that? There's a simple way: implement this operation with fstat() on the server. I get all the advantages on the remote system automatically. But for that the filesystem needs to have the open file that the fstat() on the client was performed on. It's that simple. There's really no ugly hacks going on behind the scenes. It's just that we do want to delegate some properties of this operation onto the server, and the simplest and best implementation is to just let the filesystem have the information it needs. Why is that such a big problem? Miklos - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
> But it's has various dawbacks, like rmdir doesn't work if there are > open files within an otherwise empty directory. > > I'd happily accept suggestions on how to deal with this differenty. NFS has that problem because it really has to sillyrename into the same directory. I don't see that ssh/sftp needs to do that. Instead it can sillyrename anywhere in the filesystem. Alan - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
On Mon, Sep 24, 2007 at 03:18:10PM +0200, Miklos Szeredi wrote: > > Or not support such a broken protocol at all. > > Wonder what people would say if we removed support for NFSv[23]. > > Just because a protocol does not support "perfect" UNIX semantics, it > doesn't mean it's broken. By that standard almost all network > filesystem protocols are severely broken. Well, they are broken by these and other standards. At least nfs and cifs maintainers do the workarounds for this brokeness where they belong. - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
> On Mon, Sep 24, 2007 at 03:06:06PM +0200, Miklos Szeredi wrote: > > A file isn't deleted while there are still links or open files > > refering to it. So getting the attributes for a file with nlink==0 is > > perfectly valid while the file is still open. > > Is it? Why not just pretend that the attributes are wiped when the file > is deleted. You mean "when finally unlinked"? Delete happens when the file is closed. > Effectively, they are, since they can't affect anything. Sure it can. It may be open on the server as well. > > If a network filesystem protocol can't handle operations (be it data > > or metadata) on an unlinked file, we must do sillirenaming, so that > > the file is not actually unlinked. > > Or you could call getattr right before you unlink and cache the result > in the client. The file can still be modified after being unlinked. And even if we did this caching thing and modify the attributes when the file is modified, it would not deal with access on the remote end, and would be much more complex than the other alternatives. Miklos - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
> > If a network filesystem protocol can't handle operations (be it data > > or metadata) on an unlinked file, we must do sillirenaming, so that > > the file is not actually unlinked. > > Or not support such a broken protocol at all. Wonder what people would say if we removed support for NFSv[23]. Just because a protocol does not support "perfect" UNIX semantics, it doesn't mean it's broken. By that standard almost all network filesystem protocols are severely broken. Miklos - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
On Mon, Sep 24, 2007 at 03:06:06PM +0200, Miklos Szeredi wrote: > A file isn't deleted while there are still links or open files > refering to it. So getting the attributes for a file with nlink==0 is > perfectly valid while the file is still open. Is it? Why not just pretend that the attributes are wiped when the file is deleted. Effectively, they are, since they can't affect anything. > If a network filesystem protocol can't handle operations (be it data > or metadata) on an unlinked file, we must do sillirenaming, so that > the file is not actually unlinked. Or you could call getattr right before you unlink and cache the result in the client. -- Intel are signing my paycheques ... these opinions are still mine "Bill, look, we understand that you're interested in selling us this operating system, but compare it to ours. We can't possibly take such a retrograde step." - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
On Mon, Sep 24, 2007 at 03:06:06PM +0200, Miklos Szeredi wrote: > If a network filesystem protocol can't handle operations (be it data > or metadata) on an unlinked file, we must do sillirenaming, so that > the file is not actually unlinked. Or not support such a broken protocol at all. - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
> > > and if that means adding silly rename support so be it. > > > > That's what is done currently. > > > > But it's has various dawbacks, like rmdir doesn't work if there are > > open files within an otherwise empty directory. > > > > I'd happily accept suggestions on how to deal with this differenty. > > Only sillyrename files with nlink > 1? I don't see how attributes can > change anything for a deleted file. I don't quite understand your suggestion. A file isn't deleted while there are still links or open files refering to it. So getting the attributes for a file with nlink==0 is perfectly valid while the file is still open. If a network filesystem protocol can't handle operations (be it data or metadata) on an unlinked file, we must do sillirenaming, so that the file is not actually unlinked. Miklos - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
On Mon, Sep 24, 2007 at 02:48:08PM +0200, Miklos Szeredi wrote: > > and if that means adding silly rename support so be it. > > That's what is done currently. > > But it's has various dawbacks, like rmdir doesn't work if there are > open files within an otherwise empty directory. > > I'd happily accept suggestions on how to deal with this differenty. Only sillyrename files with nlink > 1? I don't see how attributes can change anything for a deleted file. -- Intel are signing my paycheques ... these opinions are still mine "Bill, look, we understand that you're interested in selling us this operating system, but compare it to ours. We can't possibly take such a retrograde step." - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
> On Mon, Sep 24, 2007 at 02:24:54PM +0200, Miklos Szeredi wrote: > > Thanks to everyone for the feedback. Here's two of the VFS patches > > reworked according to comments. I also plan to rework the setattr() > > patch accordingly and perhaps the xattr patch, altough that is the > > lowest priority. > > > > Christoph, are these OK with you in this form? > > Not at all. Attribute operations like this have no business at all > looking at the struct file. Please fix your dreaded filesystem to > implement proper unix semantics intead, It's a fixed protocol, with servers installed on millions of servers. The protocol is called SFTP and the server is part of the OpenSSH package. There's nothing I can change there. And even if I could change the protocol, it's impossible to implement full UNIX semantics with a userspace server. Please think a bit about it. > and if that means adding silly rename support so be it. That's what is done currently. But it's has various dawbacks, like rmdir doesn't work if there are open files within an otherwise empty directory. I'd happily accept suggestions on how to deal with this differenty. Thanks, Miklos - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/2] VFS: allow filesystem to override mknod capability checks
On Mon, Sep 24, 2007 at 02:25:54PM +0200, Miklos Szeredi wrote: > From: Miklos Szeredi <[EMAIL PROTECTED]> > > Add a new super block flag, that results in the VFS not checking if > the current process has enough privileges to do an mknod(). > > If this flag is set, all mounts for this super block will have the > "nodev" flag implied. > > This is needed on filesystems, where an unprivileged user may be able > to create a device node, without causing security problems. > > One such example is "mountlo" a loopback mount utility implemented > with fuse and UML, which runs as an unprivileged userspace process. > In this case the user does in fact have the right to create device > nodes within the filesystem image, as long as the user has write > access to the image. Since the filesystem is mounted with "nodev", > adding device nodes is not a security concern. This one looks okay, but I'd prefer to not put it in until we actually have proper non-privilegued mounts. - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 1/2] VFS: new fgetattr() file operation
On Mon, Sep 24, 2007 at 02:24:54PM +0200, Miklos Szeredi wrote: > Thanks to everyone for the feedback. Here's two of the VFS patches > reworked according to comments. I also plan to rework the setattr() > patch accordingly and perhaps the xattr patch, altough that is the > lowest priority. > > Christoph, are these OK with you in this form? Not at all. Attribute operations like this have no business at all looking at the struct file. Please fix your dreaded filesystem to implement proper unix semantics intead, and if that means adding silly rename support so be it. - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [00/41] Large Blocksize Support V7 (adds memmap support)
On Sep 23, 2007, at 02:22:12, Goswin von Brederlow wrote: [EMAIL PROTECTED] (Mel Gorman) writes: On (16/09/07 23:58), Goswin von Brederlow didst pronounce: But when you already have say 10% of the ram in mixed groups then it is a sign the external fragmentation happens and some time should be spend on moving movable objects. I'll play around with it on the side and see what sort of results I get. I won't be pushing anything any time soon in relation to this though. For now, I don't intend to fiddle more with grouping pages by mobility for something that may or may not be of benefit to a feature that hasn't been widely tested with what exists today. I watched the videos you posted. A nice and quite clear improvement with and without your logic. Cudos. When you play around with it may I suggest a change to the display of the memory information. I think it would be valuable to use a Hilbert Curve to arange the pages into pixels. Like this: # # 0 3 # # ### 1 2 ### ### 0 1 E F # # ### ### 3 2 D C # # # ### # 4 7 8 B # # # # ### ### 5 6 9 A Here's an excellent example of an 0-255 numbered hilbert curve used to enumerate the various top-level allocations of IPv4 space: http://xkcd.com/195/ Cheers, Kyle Moffett - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 2/2] VFS: allow filesystem to override mknod capability checks
From: Miklos Szeredi <[EMAIL PROTECTED]> Add a new super block flag, that results in the VFS not checking if the current process has enough privileges to do an mknod(). If this flag is set, all mounts for this super block will have the "nodev" flag implied. This is needed on filesystems, where an unprivileged user may be able to create a device node, without causing security problems. One such example is "mountlo" a loopback mount utility implemented with fuse and UML, which runs as an unprivileged userspace process. In this case the user does in fact have the right to create device nodes within the filesystem image, as long as the user has write access to the image. Since the filesystem is mounted with "nodev", adding device nodes is not a security concern. Signed-off-by: Miklos Szeredi <[EMAIL PROTECTED]> --- Index: linux/fs/namei.c === --- linux.orig/fs/namei.c 2007-09-24 13:52:17.0 +0200 +++ linux/fs/namei.c2007-09-24 13:54:57.0 +0200 @@ -1617,7 +1617,7 @@ int may_open(struct nameidata *nd, int a if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { flag &= ~O_TRUNC; } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { - if (nd->mnt->mnt_flags & MNT_NODEV) + if (IS_MNT_NODEV(nd->mnt)) return -EACCES; flag &= ~O_TRUNC; @@ -1920,7 +1920,8 @@ int vfs_mknod(struct inode *dir, struct if (error) return error; - if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) + if (!(dir->i_sb->s_flags & MS_MKNOD_NOCAP) && + (S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) return -EPERM; if (!dir->i_op || !dir->i_op->mknod) Index: linux/include/linux/fs.h === --- linux.orig/include/linux/fs.h 2007-09-24 13:52:17.0 +0200 +++ linux/include/linux/fs.h2007-09-24 13:54:57.0 +0200 @@ -130,6 +130,8 @@ extern int dir_notify_enable; #define MS_SETUSER (1<<23) /* set mnt_uid to current user */ #define MS_NOMNT (1<<24) /* don't allow unprivileged submounts */ #define MS_KERNMOUNT (1<<25) /* this is a kern_mount call */ +#define MS_MKNOD_NOCAP (1<<26) /* no capability check in mknod, + implies "nodev" */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -190,6 +192,10 @@ extern int dir_notify_enable; #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) +#define IS_MNT_NODEV(mnt) (((mnt)->mnt_flags & MNT_NODEV) || \ + ((mnt)->mnt_sb->s_flags & MS_MKNOD_NOCAP)) + + /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ Index: linux/drivers/mtd/mtdsuper.c === --- linux.orig/drivers/mtd/mtdsuper.c 2007-09-24 13:52:17.0 +0200 +++ linux/drivers/mtd/mtdsuper.c2007-09-24 13:54:57.0 +0200 @@ -194,7 +194,7 @@ int get_sb_mtd(struct file_system_type * if (!S_ISBLK(nd.dentry->d_inode->i_mode)) goto out; - if (nd.mnt->mnt_flags & MNT_NODEV) { + if (IS_MNT_NODEV(nd.mnt)) { ret = -EACCES; goto out; } Index: linux/fs/block_dev.c === --- linux.orig/fs/block_dev.c 2007-09-24 13:52:17.0 +0200 +++ linux/fs/block_dev.c2007-09-24 13:54:57.0 +0200 @@ -1408,7 +1408,7 @@ struct block_device *lookup_bdev(const c if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; - if (nd.mnt->mnt_flags & MNT_NODEV) + if (IS_MNT_NODEV(nd.mnt)) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); Index: linux/fs/namespace.c === --- linux.orig/fs/namespace.c 2007-09-24 13:52:17.0 +0200 +++ linux/fs/namespace.c2007-09-24 13:54:57.0 +0200 @@ -431,7 +431,6 @@ static int show_vfsmnt(struct seq_file * }; static struct proc_fs_info mnt_info[] = { { MNT_NOSUID, ",nosuid" }, - { MNT_NODEV, ",nodev" }, { MNT_NOEXEC, ",noexec" }, { MNT_NOATIME, ",noatime" }, { MNT_NODIRATIME, ",nodiratime" }, @@ -459,6 +458,8 @@ static int show_vfsmnt(struct seq_file * if (mnt->mnt_flags & fs_infop->flag) seq_puts(m, fs_infop->str); } + if (IS_MNT_NODEV(mnt)) + seq_puts(m, ",nodev"); if (mnt->mnt_flags & MNT_USER) seq_printf(m, ",user=%i",
[patch 1/2] VFS: new fgetattr() file operation
Thanks to everyone for the feedback. Here's two of the VFS patches reworked according to comments. I also plan to rework the setattr() patch accordingly and perhaps the xattr patch, altough that is the lowest priority. Christoph, are these OK with you in this form? From: Miklos Szeredi <[EMAIL PROTECTED]> Add a new file operation: f_op->fgetattr(), that is invoked by fstat(). Fall back to i_op->getattr() if it is not defined. This is useful for filesystems such as sshfs, which don't have a state associated with inodes, but do have a state associated with open file handles, and can perform a getattr operation using these handles. In these cases there are basically two ways to correctly implement open-unlink-fstat semantics: 1) use "sillyrenaming" 2) keep track of open files for each inode within the filesystem, and randomly choose one for each getattr() request on an inode with i_nlink == 0 3) VFS passes the open file to the filesystem, which can be used to perform a getattr operation on the file handle No 3. is by far the simplest solution, but it does require this interface change in the VFS. It is also the only one that takes care of the case, when a regular file is unlinked or renamed on the remote host, while it is still open locally. Signed-off-by: Miklos Szeredi <[EMAIL PROTECTED]> --- Index: linux/fs/stat.c === --- linux.orig/fs/stat.c2007-09-24 13:04:37.0 +0200 +++ linux/fs/stat.c 2007-09-24 13:06:10.0 +0200 @@ -55,6 +55,27 @@ int vfs_getattr(struct vfsmount *mnt, st EXPORT_SYMBOL(vfs_getattr); +static int vfs_fgetattr(struct file *file, struct kstat *stat) +{ + struct vfsmount *mnt = file->f_path.mnt; + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + int retval; + + retval = security_inode_getattr(mnt, dentry); + if (retval) + return retval; + + if (file->f_op && file->f_op->fgetattr) { + return file->f_op->fgetattr(file, stat); + } else if (inode->i_op->getattr) { + return inode->i_op->getattr(mnt, dentry, stat); + } else { + generic_fillattr(inode, stat); + return 0; + } +} + int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) { struct nameidata nd; @@ -101,7 +122,7 @@ int vfs_fstat(unsigned int fd, struct ks int error = -EBADF; if (f) { - error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); + error = vfs_fgetattr(f, stat); fput(f); } return error; Index: linux/include/linux/fs.h === --- linux.orig/include/linux/fs.h 2007-09-24 13:04:37.0 +0200 +++ linux/include/linux/fs.h2007-09-24 13:06:10.0 +0200 @@ -1193,6 +1193,7 @@ struct file_operations { ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int (*setlease)(struct file *, long, struct file_lock **); int (*revoke)(struct file *, struct address_space *); + int (*fgetattr)(struct file *, struct kstat *); }; struct inode_operations { - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html