Re: [PATCH 6/11] eCryptfs: Update metadata read/write functions

2007-09-24 Thread Michael Halcrow
On Wed, Sep 19, 2007 at 10:48:17PM -0700, Andrew Morton wrote:
> On Mon, 17 Sep 2007 16:48:44 -0500 Michael Halcrow <[EMAIL PROTECTED]> wrote:
> > +   if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode,
> 
> checkpatch missed the assignment-in-an-if here.

Fix an assignment-in-an-if.

Signed-off-by: Michael Halcrow <[EMAIL PROTECTED]>
---
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 4bf1a95..b3795f6 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1306,8 +1306,9 @@ ecryptfs_write_metadata_to_contents(struct 
ecryptfs_crypt_stat *crypt_stat,
int header_pages;
int rc;
 
-   if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, page_virt,
-  0, PAGE_CACHE_SIZE))) {
+   rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, page_virt,
+ 0, PAGE_CACHE_SIZE);
+   if (rc) {
printk(KERN_ERR "%s: Error attempting to write header "
   "information to lower file; rc = [%d]\n", __FUNCTION__,
   rc);
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/11] eCryptfs: read_write.c routines

2007-09-24 Thread Michael Halcrow
On Wed, Sep 19, 2007 at 10:38:50PM -0700, Andrew Morton wrote:
> > +   offset = (page_for_lower->index << PAGE_CACHE_SHIFT) + offset_in_page;
> 
> bug.  You need to cast page.index to loff_t before shifting.
> 
> I'd fix it on the spot, but this would be a good time to review the
> whole patchset and perhaps the whole fs for this easy-to-do,
> hard-to-find bug.

Update data types and add casts in order to avoid potential overflow
issues.

Signed-off-by: Michael Halcrow <[EMAIL PROTECTED]>
---
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 5d27cf9..4bf1a95 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -149,7 +149,7 @@ out:
  * ecryptfs_derive_iv
  * @iv: destination for the derived iv vale
  * @crypt_stat: Pointer to crypt_stat struct for the current inode
- * @offset: Offset of the page whose's iv we are to derive
+ * @offset: Offset of the extent whose IV we are to derive
  *
  * Generate the initialization vector from the given root IV and page
  * offset.
@@ -157,7 +157,7 @@ out:
  * Returns zero on success; non-zero on error.
  */
 static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
- pgoff_t offset)
+ loff_t offset)
 {
int rc = 0;
char dst[MD5_DIGEST_SIZE];
@@ -173,7 +173,7 @@ static int ecryptfs_derive_iv(char *iv, struct 
ecryptfs_crypt_stat *crypt_stat,
 * hashing business. -Halcrow */
memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes);
memset((src + crypt_stat->iv_bytes), 0, 16);
-   snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset);
+   snprintf((src + crypt_stat->iv_bytes), 16, "%lld", offset);
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "source:\n");
ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16));
@@ -384,11 +384,11 @@ static int ecryptfs_encrypt_extent(struct page 
*enc_extent_page,
   struct page *page,
   unsigned long extent_offset)
 {
-   unsigned long extent_base;
+   loff_t extent_base;
char extent_iv[ECRYPTFS_MAX_IV_BYTES];
int rc;
 
-   extent_base = (page->index
+   extent_base = (((loff_t)page->index)
   * (PAGE_CACHE_SIZE / crypt_stat->extent_size));
rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
(extent_base + extent_offset));
@@ -492,8 +492,9 @@ int ecryptfs_encrypt_page(struct page *page)
goto out;
}
ecryptfs_lower_offset_for_extent(
-   &offset, ((page->index * (PAGE_CACHE_SIZE
- / crypt_stat->extent_size))
+   &offset, loff_t)page->index)
+  * (PAGE_CACHE_SIZE
+ / crypt_stat->extent_size))
  + extent_offset), crypt_stat);
rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt,
  offset, crypt_stat->extent_size);
@@ -515,11 +516,11 @@ static int ecryptfs_decrypt_extent(struct page *page,
   struct page *enc_extent_page,
   unsigned long extent_offset)
 {
-   unsigned long extent_base;
+   loff_t extent_base;
char extent_iv[ECRYPTFS_MAX_IV_BYTES];
int rc;
 
-   extent_base = (page->index
+   extent_base = (((loff_t)page->index)
   * (PAGE_CACHE_SIZE / crypt_stat->extent_size));
rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
(extent_base + extent_offset));
@@ -1320,7 +1321,7 @@ ecryptfs_write_metadata_to_contents(struct 
ecryptfs_crypt_stat *crypt_stat,
while (current_header_page < header_pages) {
loff_t offset;
 
-   offset = (current_header_page << PAGE_CACHE_SHIFT);
+   offset = (((loff_t)current_header_page) << PAGE_CACHE_SHIFT);
if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode,
   page_virt, offset,
   PAGE_CACHE_SIZE))) {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index c6a8a33..4eb09c1 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -127,7 +127,8 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
int rc = 0;
 
while (extent_num_in_page < num_extents_per_page) {
-   loff_t view_extent_num = ((page->index * num_extents_per_page)
+   loff_t view_extent_num = loff_t)page->index)
+  * num_extents_per_page)
  + extent_num_in_page);
 
if (view_extent_num < crypt_stat->num_header_extents_at_front) {
@@ -418,

Re: [00/41] Large Blocksize Support V7 (adds memmap support)

2007-09-24 Thread Christoph Lameter
On Fri, 21 Sep 2007, Hugh Dickins wrote:

> I've found some fixes needed on top of your Large Blocksize Support
> patches: I'll send those to you in a moment.  Looks like you didn't
> try much swapping!

yup. Thanks for looking at it.

> 
> I only managed to get ext2 working with larger blocksizes:
> reiserfs -b 8192 wouldn't mount ("reiserfs_fill_super: can not find
> reiserfs on /dev/sdb1"); ext3 gave me mysterious errors ("JBD: tar
> wants too many credits", even after adding JBD patches that you
> turned out to be depending on); and I didn't try ext4 or xfs
> (I'm guessing the latter has been quite well tested ;)

Yes, there were issues with the first releases of the JBD patches. The 
current crop in mm is fine but much of that may have bypassed this list.



-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: 2.6.22.6: kernel BUG at fs/locks.c:171

2007-09-24 Thread Soeren Sonnenburg

On Fri, 2007-09-14 at 07:22 +1000, Nick Piggin wrote:
> On Friday 14 September 2007 16:02, Soeren Sonnenburg wrote:
> > On Thu, 2007-09-13 at 09:51 +1000, Nick Piggin wrote:
> > > On Thursday 13 September 2007 19:20, Soeren Sonnenburg wrote:
> > > > Dear all,
> > > >
> > > > I've just seen this in dmesg on a AMD K7 / kernel 2.6.22.6 machine
> > > > (config attached).
> > > >
> > > > Any ideas / which further information needed ?
> > >
> > > Thanks for the report. Is it reproduceable? It seems like the
> > > locks_free_lock call that's oopsing is coming from __posix_lock_file.
> > > The actual function looks fine, but the lock being freed could have
> > > been corrupted if there was slab corruption, or a hardware corruption.
> > >
> > > You could: try running memtest86+ overnight. And try the following
> > > patch and turn on slab debugging then try to reproduce the problem.
> >
> > OK so far I've run memtest86+ 1.40 from freedos for 8 hrs (v1.70 hung on
> > startup) - nothing.
> 
> Thanks.
> 
> > Could this corruption be caused by a pci card/driver? I am asking as I
> > am using a new dvb-t card (asus p7131) and the oops happened after 5 or
> > 6 days of uptime just about a day after watching some movie (very bad
> > reception/lots of errors).
> 
> It could be caused by that, definitely. slab debugging plus my earlier
> patch may help to narrow it down. (or stress testing with / without the
> dvb card in action).

OK, it is the dvb card. I have 1 week of uptime now without any errors.
Only change is the dvb driver (saa7146) not loaded.

:(
Soeren
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [00/41] Large Blocksize Support V7 (adds memmap support)

2007-09-24 Thread Andrea Arcangeli
On Sun, Sep 23, 2007 at 08:56:39AM +0200, Goswin von Brederlow wrote:
> As a user I know it because I didn't put a kernel source into /tmp. A
> programm can't reasonably know that.

Various apps requires you (admin/user) to tune the size of their
caches. Seems like you never tried to setup a database, oh well.

> Xen has its own memory pool and can quite agressively reclaim memory
> from dom0 when needed. I just ment to say that the number in

The whole point is if there's not enough ram of course... this is why
you should check.

> /proc/meminfo can change in a second so it is not much use knowing
> what it said last minute.

The numbers will change depending on what's running on your
system. It's up to you to know plus I normally keep vmstat monitored
in the background to see how the cache/free levels change over
time. Those numbers are worthless if they could be fragmented...

> I would kill any programm that does that to find out how much free ram
> the system has.

The admin should do that if he's unsure, not a program of course!
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Miklos Szeredi
> > But it's has various dawbacks, like rmdir doesn't work if there are
> > open files within an otherwise empty directory.
> > 
> > I'd happily accept suggestions on how to deal with this differenty.
> 
> NFS has that problem because it really has to sillyrename into the same
> directory. I don't see that ssh/sftp needs to do that. Instead it can
> sillyrename anywhere in the filesystem.

I don't think it can.  How can we find in a reliable way another
directory, which is writable by the user?

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Miklos Szeredi
> On Mon, Sep 24, 2007 at 03:18:10PM +0200, Miklos Szeredi wrote:
> > > Or not support such a broken protocol at all.
> > 
> > Wonder what people would say if we removed support for NFSv[23].
> > 
> > Just because a protocol does not support "perfect" UNIX semantics, it
> > doesn't mean it's broken.  By that standard almost all network
> > filesystem protocols are severely broken.
> 
> Well, they are broken by these and other standards.  At least nfs and
> cifs maintainers do the workarounds for this brokeness where they belong.

And my patch is not working around a problem, rather solving a problem
in a correct way.

Let me summarise it:

There are valid reasons we have fstat() in addition to stat/lstat.
For example we want to protect agains races involving unlink/rename on
an open file.

Say I want to implement a network filesystem with a pure unprivileged
userspace sever (this is basically what sshfs does).

I want my filesystem client implementation to keep all these
advantages of fstat().  So how can I do that?  There's a simple way:
implement this operation with fstat() on the server.  I get all the
advantages on the remote system automatically.

But for that the filesystem needs to have the open file that the
fstat() on the client was performed on.

It's that simple.  There's really no ugly hacks going on behind the
scenes.  It's just that we do want to delegate some properties of this
operation onto the server, and the simplest and best implementation is
to just let the filesystem have the information it needs.

Why is that such a big problem?

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Alan Cox
> But it's has various dawbacks, like rmdir doesn't work if there are
> open files within an otherwise empty directory.
> 
> I'd happily accept suggestions on how to deal with this differenty.

NFS has that problem because it really has to sillyrename into the same
directory. I don't see that ssh/sftp needs to do that. Instead it can
sillyrename anywhere in the filesystem.

Alan
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Christoph Hellwig
On Mon, Sep 24, 2007 at 03:18:10PM +0200, Miklos Szeredi wrote:
> > Or not support such a broken protocol at all.
> 
> Wonder what people would say if we removed support for NFSv[23].
> 
> Just because a protocol does not support "perfect" UNIX semantics, it
> doesn't mean it's broken.  By that standard almost all network
> filesystem protocols are severely broken.

Well, they are broken by these and other standards.  At least nfs and
cifs maintainers do the workarounds for this brokeness where they belong.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Miklos Szeredi
> On Mon, Sep 24, 2007 at 03:06:06PM +0200, Miklos Szeredi wrote:
> > A file isn't deleted while there are still links or open files
> > refering to it.  So getting the attributes for a file with nlink==0 is
> > perfectly valid while the file is still open.
> 
> Is it?  Why not just pretend that the attributes are wiped when the file
> is deleted.

You mean "when finally unlinked"?  Delete happens when the file is
closed.

> Effectively, they are, since they can't affect anything.

Sure it can.  It may be open on the server as well.

> > If a network filesystem protocol can't handle operations (be it data
> > or metadata) on an unlinked file, we must do sillirenaming, so that
> > the file is not actually unlinked.
> 
> Or you could call getattr right before you unlink and cache the result
> in the client.

The file can still be modified after being unlinked.

And even if we did this caching thing and modify the attributes when
the file is modified, it would not deal with access on the remote end,
and would be much more complex than the other alternatives.

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Miklos Szeredi
> > If a network filesystem protocol can't handle operations (be it data
> > or metadata) on an unlinked file, we must do sillirenaming, so that
> > the file is not actually unlinked.
> 
> Or not support such a broken protocol at all.

Wonder what people would say if we removed support for NFSv[23].

Just because a protocol does not support "perfect" UNIX semantics, it
doesn't mean it's broken.  By that standard almost all network
filesystem protocols are severely broken.

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Matthew Wilcox
On Mon, Sep 24, 2007 at 03:06:06PM +0200, Miklos Szeredi wrote:
> A file isn't deleted while there are still links or open files
> refering to it.  So getting the attributes for a file with nlink==0 is
> perfectly valid while the file is still open.

Is it?  Why not just pretend that the attributes are wiped when the file
is deleted.  Effectively, they are, since they can't affect anything.

> If a network filesystem protocol can't handle operations (be it data
> or metadata) on an unlinked file, we must do sillirenaming, so that
> the file is not actually unlinked.

Or you could call getattr right before you unlink and cache the result
in the client.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Christoph Hellwig
On Mon, Sep 24, 2007 at 03:06:06PM +0200, Miklos Szeredi wrote:
> If a network filesystem protocol can't handle operations (be it data
> or metadata) on an unlinked file, we must do sillirenaming, so that
> the file is not actually unlinked.

Or not support such a broken protocol at all.

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Miklos Szeredi
> > > and if that means adding silly rename support so be it.
> > 
> > That's what is done currently.
> > 
> > But it's has various dawbacks, like rmdir doesn't work if there are
> > open files within an otherwise empty directory.
> > 
> > I'd happily accept suggestions on how to deal with this differenty.
> 
> Only sillyrename files with nlink > 1?  I don't see how attributes can
> change anything for a deleted file.

I don't quite understand your suggestion.

A file isn't deleted while there are still links or open files
refering to it.  So getting the attributes for a file with nlink==0 is
perfectly valid while the file is still open.

If a network filesystem protocol can't handle operations (be it data
or metadata) on an unlinked file, we must do sillirenaming, so that
the file is not actually unlinked.

Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Matthew Wilcox
On Mon, Sep 24, 2007 at 02:48:08PM +0200, Miklos Szeredi wrote:
> > and if that means adding silly rename support so be it.
> 
> That's what is done currently.
> 
> But it's has various dawbacks, like rmdir doesn't work if there are
> open files within an otherwise empty directory.
> 
> I'd happily accept suggestions on how to deal with this differenty.

Only sillyrename files with nlink > 1?  I don't see how attributes can
change anything for a deleted file.

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Miklos Szeredi
> On Mon, Sep 24, 2007 at 02:24:54PM +0200, Miklos Szeredi wrote:
> > Thanks to everyone for the feedback.  Here's two of the VFS patches
> > reworked according to comments.  I also plan to rework the setattr()
> > patch accordingly and perhaps the xattr patch, altough that is the
> > lowest priority.
> > 
> > Christoph, are these OK with you in this form?
> 
> Not at all.  Attribute operations like this have no business at all
> looking at the struct file.  Please fix your dreaded filesystem to
> implement proper unix semantics intead,

It's a fixed protocol, with servers installed on millions of servers.
The protocol is called SFTP and the server is part of the OpenSSH
package.  There's nothing I can change there.

And even if I could change the protocol, it's impossible to implement
full UNIX semantics with a userspace server.  Please think a bit about
it.

> and if that means adding silly rename support so be it.

That's what is done currently.

But it's has various dawbacks, like rmdir doesn't work if there are
open files within an otherwise empty directory.

I'd happily accept suggestions on how to deal with this differenty.

Thanks,
Miklos
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 2/2] VFS: allow filesystem to override mknod capability checks

2007-09-24 Thread Christoph Hellwig
On Mon, Sep 24, 2007 at 02:25:54PM +0200, Miklos Szeredi wrote:
> From: Miklos Szeredi <[EMAIL PROTECTED]>
> 
> Add a new super block flag, that results in the VFS not checking if
> the current process has enough privileges to do an mknod().
> 
> If this flag is set, all mounts for this super block will have the
> "nodev" flag implied.
> 
> This is needed on filesystems, where an unprivileged user may be able
> to create a device node, without causing security problems.
> 
> One such example is "mountlo" a loopback mount utility implemented
> with fuse and UML, which runs as an unprivileged userspace process.
> In this case the user does in fact have the right to create device
> nodes within the filesystem image, as long as the user has write
> access to the image.  Since the filesystem is mounted with "nodev",
> adding device nodes is not a security concern.

This one looks okay, but I'd prefer to not put it in until we actually
have proper non-privilegued mounts.

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Christoph Hellwig
On Mon, Sep 24, 2007 at 02:24:54PM +0200, Miklos Szeredi wrote:
> Thanks to everyone for the feedback.  Here's two of the VFS patches
> reworked according to comments.  I also plan to rework the setattr()
> patch accordingly and perhaps the xattr patch, altough that is the
> lowest priority.
> 
> Christoph, are these OK with you in this form?

Not at all.  Attribute operations like this have no business at all
looking at the struct file.  Please fix your dreaded filesystem to
implement proper unix semantics intead, and if that means adding
silly rename support so be it.

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [00/41] Large Blocksize Support V7 (adds memmap support)

2007-09-24 Thread Kyle Moffett

On Sep 23, 2007, at 02:22:12, Goswin von Brederlow wrote:

[EMAIL PROTECTED] (Mel Gorman) writes:

On (16/09/07 23:58), Goswin von Brederlow didst pronounce:
But when you already have say 10% of the ram in mixed groups then  
it is a sign the external fragmentation happens and some time  
should be spend on moving movable objects.


I'll play around with it on the side and see what sort of results  
I get.  I won't be pushing anything any time soon in relation to  
this though.  For now, I don't intend to fiddle more with grouping  
pages by mobility for something that may or may not be of benefit  
to a feature that hasn't been widely tested with what exists today.


I watched the videos you posted. A nice and quite clear improvement  
with and without your logic. Cudos.


When you play around with it may I suggest a change to the display  
of the memory information. I think it would be valuable to use a  
Hilbert Curve to arange the pages into pixels. Like this:


# #  0  3
# #
###  1  2

### ###  0 1 E F
  # #
### ###  3 2 D C
# #
# ### #  4 7 8 B
# # # #
### ###  5 6 9 A


Here's an excellent example of an 0-255 numbered hilbert curve used  
to enumerate the various top-level allocations of IPv4 space:

http://xkcd.com/195/

Cheers,
Kyle Moffett

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 2/2] VFS: allow filesystem to override mknod capability checks

2007-09-24 Thread Miklos Szeredi
From: Miklos Szeredi <[EMAIL PROTECTED]>

Add a new super block flag, that results in the VFS not checking if
the current process has enough privileges to do an mknod().

If this flag is set, all mounts for this super block will have the
"nodev" flag implied.

This is needed on filesystems, where an unprivileged user may be able
to create a device node, without causing security problems.

One such example is "mountlo" a loopback mount utility implemented
with fuse and UML, which runs as an unprivileged userspace process.
In this case the user does in fact have the right to create device
nodes within the filesystem image, as long as the user has write
access to the image.  Since the filesystem is mounted with "nodev",
adding device nodes is not a security concern.

Signed-off-by: Miklos Szeredi <[EMAIL PROTECTED]>
---

Index: linux/fs/namei.c
===
--- linux.orig/fs/namei.c   2007-09-24 13:52:17.0 +0200
+++ linux/fs/namei.c2007-09-24 13:54:57.0 +0200
@@ -1617,7 +1617,7 @@ int may_open(struct nameidata *nd, int a
if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
flag &= ~O_TRUNC;
} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
-   if (nd->mnt->mnt_flags & MNT_NODEV)
+   if (IS_MNT_NODEV(nd->mnt))
return -EACCES;
 
flag &= ~O_TRUNC;
@@ -1920,7 +1920,8 @@ int vfs_mknod(struct inode *dir, struct 
if (error)
return error;
 
-   if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+   if (!(dir->i_sb->s_flags & MS_MKNOD_NOCAP) &&
+   (S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
return -EPERM;
 
if (!dir->i_op || !dir->i_op->mknod)
Index: linux/include/linux/fs.h
===
--- linux.orig/include/linux/fs.h   2007-09-24 13:52:17.0 +0200
+++ linux/include/linux/fs.h2007-09-24 13:54:57.0 +0200
@@ -130,6 +130,8 @@ extern int dir_notify_enable;
 #define MS_SETUSER (1<<23) /* set mnt_uid to current user */
 #define MS_NOMNT   (1<<24) /* don't allow unprivileged submounts */
 #define MS_KERNMOUNT   (1<<25) /* this is a kern_mount call */
+#define MS_MKNOD_NOCAP (1<<26) /* no capability check in mknod,
+  implies "nodev" */
 #define MS_ACTIVE  (1<<30)
 #define MS_NOUSER  (1<<31)
 
@@ -190,6 +192,10 @@ extern int dir_notify_enable;
 #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
 #define IS_PRIVATE(inode)  ((inode)->i_flags & S_PRIVATE)
 
+#define IS_MNT_NODEV(mnt)  (((mnt)->mnt_flags & MNT_NODEV) || \
+   ((mnt)->mnt_sb->s_flags & MS_MKNOD_NOCAP))
+
+
 /* the read-only stuff doesn't really belong here, but any other place is
probably as bad and I don't want to create yet another include file. */
 
Index: linux/drivers/mtd/mtdsuper.c
===
--- linux.orig/drivers/mtd/mtdsuper.c   2007-09-24 13:52:17.0 +0200
+++ linux/drivers/mtd/mtdsuper.c2007-09-24 13:54:57.0 +0200
@@ -194,7 +194,7 @@ int get_sb_mtd(struct file_system_type *
if (!S_ISBLK(nd.dentry->d_inode->i_mode))
goto out;
 
-   if (nd.mnt->mnt_flags & MNT_NODEV) {
+   if (IS_MNT_NODEV(nd.mnt)) {
ret = -EACCES;
goto out;
}
Index: linux/fs/block_dev.c
===
--- linux.orig/fs/block_dev.c   2007-09-24 13:52:17.0 +0200
+++ linux/fs/block_dev.c2007-09-24 13:54:57.0 +0200
@@ -1408,7 +1408,7 @@ struct block_device *lookup_bdev(const c
if (!S_ISBLK(inode->i_mode))
goto fail;
error = -EACCES;
-   if (nd.mnt->mnt_flags & MNT_NODEV)
+   if (IS_MNT_NODEV(nd.mnt))
goto fail;
error = -ENOMEM;
bdev = bd_acquire(inode);
Index: linux/fs/namespace.c
===
--- linux.orig/fs/namespace.c   2007-09-24 13:52:17.0 +0200
+++ linux/fs/namespace.c2007-09-24 13:54:57.0 +0200
@@ -431,7 +431,6 @@ static int show_vfsmnt(struct seq_file *
};
static struct proc_fs_info mnt_info[] = {
{ MNT_NOSUID, ",nosuid" },
-   { MNT_NODEV, ",nodev" },
{ MNT_NOEXEC, ",noexec" },
{ MNT_NOATIME, ",noatime" },
{ MNT_NODIRATIME, ",nodiratime" },
@@ -459,6 +458,8 @@ static int show_vfsmnt(struct seq_file *
if (mnt->mnt_flags & fs_infop->flag)
seq_puts(m, fs_infop->str);
}
+   if (IS_MNT_NODEV(mnt))
+   seq_puts(m, ",nodev");
if (mnt->mnt_flags & MNT_USER)
seq_printf(m, ",user=%i",

[patch 1/2] VFS: new fgetattr() file operation

2007-09-24 Thread Miklos Szeredi
Thanks to everyone for the feedback.  Here's two of the VFS patches
reworked according to comments.  I also plan to rework the setattr()
patch accordingly and perhaps the xattr patch, altough that is the
lowest priority.

Christoph, are these OK with you in this form?


From: Miklos Szeredi <[EMAIL PROTECTED]>

Add a new file operation: f_op->fgetattr(), that is invoked by
fstat().  Fall back to i_op->getattr() if it is not defined.

This is useful for filesystems such as sshfs, which don't have a state
associated with inodes, but do have a state associated with open file
handles, and can perform a getattr operation using these handles.

In these cases there are basically two ways to correctly implement
open-unlink-fstat semantics:

 1) use "sillyrenaming"

 2) keep track of open files for each inode within the filesystem, and
randomly choose one for each getattr() request on an inode with
i_nlink == 0

 3) VFS passes the open file to the filesystem, which can be used to
perform a getattr operation on the file handle

No 3. is by far the simplest solution, but it does require this
interface change in the VFS.  It is also the only one that takes care
of the case, when a regular file is unlinked or renamed on the remote
host, while it is still open locally.

Signed-off-by: Miklos Szeredi <[EMAIL PROTECTED]>
---

Index: linux/fs/stat.c
===
--- linux.orig/fs/stat.c2007-09-24 13:04:37.0 +0200
+++ linux/fs/stat.c 2007-09-24 13:06:10.0 +0200
@@ -55,6 +55,27 @@ int vfs_getattr(struct vfsmount *mnt, st
 
 EXPORT_SYMBOL(vfs_getattr);
 
+static int vfs_fgetattr(struct file *file, struct kstat *stat)
+{
+   struct vfsmount *mnt = file->f_path.mnt;
+   struct dentry *dentry = file->f_path.dentry;
+   struct inode *inode = dentry->d_inode;
+   int retval;
+
+   retval = security_inode_getattr(mnt, dentry);
+   if (retval)
+   return retval;
+
+   if (file->f_op && file->f_op->fgetattr) {
+   return file->f_op->fgetattr(file, stat);
+   } else if (inode->i_op->getattr) {
+   return inode->i_op->getattr(mnt, dentry, stat);
+   } else {
+   generic_fillattr(inode, stat);
+   return 0;
+   }
+}
+
 int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
 {
struct nameidata nd;
@@ -101,7 +122,7 @@ int vfs_fstat(unsigned int fd, struct ks
int error = -EBADF;
 
if (f) {
-   error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
+   error = vfs_fgetattr(f, stat);
fput(f);
}
return error;
Index: linux/include/linux/fs.h
===
--- linux.orig/include/linux/fs.h   2007-09-24 13:04:37.0 +0200
+++ linux/include/linux/fs.h2007-09-24 13:06:10.0 +0200
@@ -1193,6 +1193,7 @@ struct file_operations {
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info 
*, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **);
int (*revoke)(struct file *, struct address_space *);
+   int (*fgetattr)(struct file *, struct kstat *);
 };
 
 struct inode_operations {
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html