Re: [patch 0/6][RFC] Cleanup FIBMAP

2007-10-27 Thread H. Peter Anvin

Mike Waychison wrote:

The following series is meant to clean up FIBMAP paths with the eventual goal 
of allowing users to be able to FIBMAP their data.


Keep in mind FIBMAP is currently extremely expensive on some 
filesystems, e.g. ext3.  Therefore, additional filesystem-level work 
would have to be done in order for this not become a DoS issue.


-hpa

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] VFS: apply coding standards to fs/ioctl.c

2007-10-27 Thread Erez Zadok
Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
---
 fs/ioctl.c |  164 +++-
 1 files changed, 84 insertions(+), 80 deletions(-)

diff --git a/fs/ioctl.c b/fs/ioctl.c
index c2a773e..652cacf 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -12,8 +12,8 @@
 #include 
 #include 
 #include 
+#include 
 
-#include 
 #include 
 
 static long do_ioctl(struct file *filp, unsigned int cmd,
@@ -45,31 +45,31 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
 {
int error;
int block;
-   struct inode * inode = filp->f_path.dentry->d_inode;
+   struct inode *inode = filp->f_path.dentry->d_inode;
int __user *p = (int __user *)arg;
 
switch (cmd) {
-   case FIBMAP:
-   {
-   struct address_space *mapping = filp->f_mapping;
-   int res;
-   /* do we support this mess? */
-   if (!mapping->a_ops->bmap)
-   return -EINVAL;
-   if (!capable(CAP_SYS_RAWIO))
-   return -EPERM;
-   if ((error = get_user(block, p)) != 0)
-   return error;
-
-   lock_kernel();
-   res = mapping->a_ops->bmap(mapping, block);
-   unlock_kernel();
-   return put_user(res, p);
-   }
-   case FIGETBSZ:
-   return put_user(inode->i_sb->s_blocksize, p);
-   case FIONREAD:
-   return put_user(i_size_read(inode) - filp->f_pos, p);
+   case FIBMAP:
+   {
+   struct address_space *mapping = filp->f_mapping;
+   int res;
+   /* do we support this mess? */
+   if (!mapping->a_ops->bmap)
+   return -EINVAL;
+   if (!capable(CAP_SYS_RAWIO))
+   return -EPERM;
+   error = get_user(block, p);
+   if (error)
+   return error;
+   lock_kernel();
+   res = mapping->a_ops->bmap(mapping, block);
+   unlock_kernel();
+   return put_user(res, p);
+   }
+   case FIGETBSZ:
+   return put_user(inode->i_sb->s_blocksize, p);
+   case FIONREAD:
+   return put_user(i_size_read(inode) - filp->f_pos, p);
}
 
return do_ioctl(filp, cmd, arg);
@@ -82,81 +82,85 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
  * vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
  * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
  */
-int vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned 
long arg)
+int vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
+ unsigned long arg)
 {
unsigned int flag;
int on, error = 0;
 
switch (cmd) {
-   case FIOCLEX:
-   set_close_on_exec(fd, 1);
-   break;
+   case FIOCLEX:
+   set_close_on_exec(fd, 1);
+   break;
 
-   case FIONCLEX:
-   set_close_on_exec(fd, 0);
-   break;
+   case FIONCLEX:
+   set_close_on_exec(fd, 0);
+   break;
 
-   case FIONBIO:
-   if ((error = get_user(on, (int __user *)arg)) != 0)
-   break;
-   flag = O_NONBLOCK;
+   case FIONBIO:
+   error = get_user(on, (int __user *)arg);
+   if (error)
+   break;
+   flag = O_NONBLOCK;
 #ifdef __sparc__
-   /* SunOS compatibility item. */
-   if(O_NONBLOCK != O_NDELAY)
-   flag |= O_NDELAY;
+   /* SunOS compatibility item. */
+   if (O_NONBLOCK != O_NDELAY)
+   flag |= O_NDELAY;
 #endif
-   if (on)
-   filp->f_flags |= flag;
-   else
-   filp->f_flags &= ~flag;
+   if (on)
+   filp->f_flags |= flag;
+   else
+   filp->f_flags &= ~flag;
+   break;
+
+   case FIOASYNC:
+   error = get_user(on, (int __user *)arg);
+   if (error)
break;
-
-   case FIOASYNC:
-   if ((error = get_user(on, (int __user *)arg)) != 0)
-   break;
-   flag = on ? FASYNC : 0;
-
-   /* Did FASYNC state change ? */
-   if ((flag ^ filp->f_flags) & FASYNC) {
-   if (filp->f_op && filp->f_op->fasync) {
-  

[PATCH 3/3] Unionfs: use vfs_ioctl

2007-10-27 Thread Erez Zadok
Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
---
 fs/unionfs/commonfops.c |   32 ++--
 1 files changed, 6 insertions(+), 26 deletions(-)

diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c
index 50e5775..c99b519 100644
--- a/fs/unionfs/commonfops.c
+++ b/fs/unionfs/commonfops.c
@@ -661,31 +661,6 @@ out:
return err;
 }
 
-/* pass the ioctl to the lower fs */
-static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-   struct file *lower_file;
-   int err;
-
-   lower_file = unionfs_lower_file(file);
-
-   err = -ENOTTY;
-   if (!lower_file || !lower_file->f_op)
-   goto out;
-   if (lower_file->f_op->unlocked_ioctl) {
-   err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
-   } else if (lower_file->f_op->ioctl) {
-   lock_kernel();
-   err = lower_file->f_op->ioctl(
-   lower_file->f_path.dentry->d_inode,
-   lower_file, cmd, arg);
-   unlock_kernel();
-   }
-
-out:
-   return err;
-}
-
 /*
  * return to user-space the branch indices containing the file in question
  *
@@ -752,6 +727,7 @@ out:
 long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
long err;
+   struct file *lower_file;
 
unionfs_read_lock(file->f_path.dentry->d_sb);
 
@@ -775,7 +751,11 @@ long unionfs_ioctl(struct file *file, unsigned int cmd, 
unsigned long arg)
 
default:
/* pass the ioctl down */
-   err = do_ioctl(file, cmd, arg);
+   lower_file = unionfs_lower_file(file);
+   if (lower_file)
+   err = vfs_ioctl(lower_file, cmd, arg);
+   else
+   err = -ENOTTY;
break;
}
 
-- 
1.5.2.2

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] 0/3 fs/ioctl.c coding style, rename vfs_ioctl/do_ioctl

2007-10-27 Thread Erez Zadok

This series of three proposed patches changes fs/ioctl.c and Unionfs as
follows.  This series is against v2.6.24-rc1-192-gef49c32.

Patch 1: just applies coding standards to fs/ioctl.c (while I'm at it, I
figured it's worth cleaning VFS files one at a time).

Patch 2: does two things:

(a) Renames the old vfs_ioctl to do_ioctl, because the comment above the old
vfs_ioctl clearly indicates that it is an internal function not to be
exported to modules; therefore it should have a more traditional do_XXX
"internal function" name.  The new do_ioctl is exported in fs.h but not
to modules.

(b) Renames the old (static) do_ioctl to vfs_ioctl because the names vfs_XXX
should preferably be reserved to callable VFS functions which modules
may call, as other vfs_XXX functions already do.  Export the new
vfs_ioctl to modules so others can use it (including Unionfs and
eCryptfs).

Patch 3: demonstrates how Unionfs can use the new vfs_ioctl.  I successfully
tested unionfs with this new exported vfs_ioctl.  (eCryptfs could do the
same.)

I'd like to propose that the first two patches be merged in -mm and even
mainline, pending review.

Erez Zadok (3):
  VFS: apply coding standards to fs/ioctl.c
  VFS: swap do_ioctl and vfs_ioctl names
  Unionfs: use vfs_ioctl

 fs/compat_ioctl.c   |2 
 fs/ioctl.c  |  176 
 fs/unionfs/commonfops.c |   22 +-
 include/linux/fs.h  |3 

Cheers,
Erez.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] VFS: swap do_ioctl and vfs_ioctl names

2007-10-27 Thread Erez Zadok
Rename old vfs_ioctl to do_ioctl, because the comment above it clearly
indicates that it is an internal function not to be exported to modules;
therefore it should have a more traditional do_XXX name.  The new do_ioctl
is exported in fs.h but not to modules.

Rename the old do_ioctl to vfs_ioctl because the names vfs_XXX should
preferably be reserved to callable VFS functions which modules may call,
as many other vfs_XXX functions already do.  Export the new vfs_ioctl to
modules so others can use it (including Unionfs and eCryptfs).

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
---
 fs/compat_ioctl.c  |2 +-
 fs/ioctl.c |   18 ++
 include/linux/fs.h |3 ++-
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a4284cc..a1604ce 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2972,7 +2972,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, 
unsigned int cmd,
}
 
  do_ioctl:
-   error = vfs_ioctl(filp, fd, cmd, arg);
+   error = do_ioctl(filp, fd, cmd, arg);
  out_fput:
fput_light(filp, fput_needed);
  out:
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 652cacf..00abbbf 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -16,8 +16,9 @@
 
 #include 
 
-static long do_ioctl(struct file *filp, unsigned int cmd,
-   unsigned long arg)
+/* vfs_ioctl can be called by other file systems or modules */
+long vfs_ioctl(struct file *filp, unsigned int cmd,
+  unsigned long arg)
 {
int error = -ENOTTY;
 
@@ -39,6 +40,7 @@ static long do_ioctl(struct file *filp, unsigned int cmd,
  out:
return error;
 }
+EXPORT_SYMBOL(vfs_ioctl);
 
 static int file_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
@@ -72,18 +74,18 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
return put_user(i_size_read(inode) - filp->f_pos, p);
}
 
-   return do_ioctl(filp, cmd, arg);
+   return vfs_ioctl(filp, cmd, arg);
 }
 
 /*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
  *
- * vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
+ * do_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
  * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
  */
-int vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
- unsigned long arg)
+int do_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
+unsigned long arg)
 {
unsigned int flag;
int on, error = 0;
@@ -152,7 +154,7 @@ int vfs_ioctl(struct file *filp, unsigned int fd, unsigned 
int cmd,
if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
error = file_ioctl(filp, cmd, arg);
else
-   error = do_ioctl(filp, cmd, arg);
+   error = vfs_ioctl(filp, cmd, arg);
break;
}
return error;
@@ -172,7 +174,7 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int 
cmd, unsigned long arg)
if (error)
goto out_fput;
 
-   error = vfs_ioctl(filp, fd, cmd, arg);
+   error = do_ioctl(filp, fd, cmd, arg);
  out_fput:
fput_light(filp, fput_needed);
  out:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b3ec4a4..c0c5d36 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1924,7 +1924,8 @@ extern int vfs_stat_fd(int dfd, char __user *, struct 
kstat *);
 extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *);
 extern int vfs_fstat(unsigned int, struct kstat *);
 
-extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long);
+extern long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+extern int do_ioctl(struct file *, unsigned int, unsigned int, unsigned long);
 
 extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
-- 
1.5.2.2

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/9] Unionfs: security convert lsm into a static interface fix

2007-10-27 Thread Erez Zadok
In message <[EMAIL PROTECTED]>, Christoph Hellwig writes:
> On Mon, Oct 22, 2007 at 08:48:04PM -0400, Erez Zadok wrote:
> > Why?  Are you concerned that the security policy may change after a module
> > is loaded?
> 
> No, it's a matter of proper layering.  We generally don't want modules
> like stackabke filesystems to call directly into methods but rather use
> proper highlevel VFS helpers to isolate them from details and possible
> changes.  The move to out of line security_ helpers just put this on the
> radard.

OK, I'll be shortly posting a couple of patches to fs/ioctl.c.

> > I can probably get rid of having unionfs call security_inode_permission,
> > by calling permission() myself and carefully post-process its return
> > code (unionfs needs to "ignore" EROFS initially, to allow copyup to take
> > place).
> 
> Sounds fine.

I was able to test this idea and it works fine.  Now unionfs calls
permission(), post-processes the return value, and I don't need my own
modified version of permission() in unionfs.  This saved me ~50 LoC and
reduced stack pressure a little.

> > But security_file_ioctl doesn't have any existing helper I can call.  I
> > can introduce a trivial vfs_security_file_ioctl wrapper to
> > security_file_ioctl, but what about the already existing *19* exported
> > security_* functions in security/security.c?  Do you want to see simple
> > wrappers for all of them?  It seems redundant to add a one-line wrapper
> > around an already one-line function around security_ops->XXX.  Plus,
> > some of the existing exported security_* functions are file-system
> > related, others are networking, etc.  So we'll need wrappers whose names
> > are prefixed appropriately: vfs_*, net_*, etc.
> 
> The fix for security_file_ioctl is probably to either not do it at all
> or move it the call to security_file_ioctl into vfs_ioctl and get it by
> using that helper.  I suspect most other security_ exports should be
> avoided similarly.

Christoph, I looked more closely at that and the selinux code.  Only
sys_ioctl calls security_file_ioctl.  And security_file_ioctl performs all
sorts of checks that mostly have to do with the currently running task or
the open file.  The running task is still the same, whether filesystem
stacking is involved or not.  Also, the unionfs-level struct file is
logically the same file at the lower level: they refer to the same object,
just at two layers.  I can't see any reason why unionfs_ioctl should have to
call security_file_ioctl(lower_file) the way sys_ioctl does: that check is
already done well before the file system's ->ioctl is invoked.  I also don't
see how it would ever be possible that sys_ioctl will succeed in its call to
security_file_ioctl(upper_file), but unionfs will fail the same security
check on the lower file.

So I commented out unionfs's call to security_file_ioctl(lower_file) and
tested it on a bunch of things, including an selinux-enabled livecd.
Everything seemed to work just fine, so I'll be sending some patches to that
effect, and we can drop the -mm patch which exports security_file_ioctl().
BTW, ecryptfs doesn't call security_file_ioctl().

Cheers,
Erez.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 0/6][RFC] Cleanup FIBMAP

2007-10-27 Thread Szabolcs Szakacsits

On Sat, 27 Oct 2007, Anton Altaparmakov wrote:

> And another of my pet peeves with ->bmap is that it uses 0 to mean "sparse"
> which causes a conflict on NTFS at least as block zero is part of the $Boot
> system file so it is a real, valid block...  NTFS uses -1 to denote sparse
> blocks internally.

In practice, the meaning of 0 is file system [driver] dependent. For 
example in case of NTFS-3G it means that the block is sparse or the file is 
encrypted or compressed, or resident, or it's the $Boot file, or an error 
happened.

Thankfully the widely used FIBMAP users (swapon and the ever less used 
lilo) are only interested in the non-zero values and they report an error 
if the driver returns 0 for some reason. Which is perfectly ok since both 
swaping and Linux booting would fail using a sparse, encrypted, compressed, 
resident, or the NTFS $Boot file. 

But in real, both swap files and lilo work fine with NTFS if the needed 
files were created the way these softwares expect. If not then swapon or 
lilo will catch and report the file creation error.

Afair, somebody is doing (has done?) an indeed much needed, better 
alternative. Bmap is legacy, thank you Mike for maintaining it.

Szaka

--
NTFS-3G Lead Developer:  http://ntfs-3g.org
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Networked filesystems vs backing_dev_info

2007-10-27 Thread Peter Zijlstra
On Sat, 2007-10-27 at 23:30 +0200, Peter Zijlstra wrote:
> On Sat, 2007-10-27 at 16:02 -0500, Steve French wrote:
> > On 10/27/07, Peter Zijlstra <[EMAIL PROTECTED]> wrote:
> > > Hi,
> > >
> > > I had me a little look at bdi usage in networked filesystems.
> > >
> > >  NFS, CIFS, (smbfs), AFS, CODA and NCP
> > >
> > > And of those, NFS is the only one that I could find that creates
> > > backing_dev_info structures. The rest seems to fall back to
> > > default_backing_dev_info.
> > >
> > > With my recent per bdi dirty limit patches the bdi has become more
> > > important than it has been in the past. While falling back to the
> > > default_backing_dev_info isn't wrong per-se, it isn't right either.
> > >
> > > Could I implore the various maintainers to look into this issue for
> > > their respective filesystem. I'll try and come up with some patches to
> > > address this, but feel free to beat me to it.
> > 
> > I would like to understand more about your patches to see what bdi
> > values makes sense for CIFS and how to report possible congestion back
> > to the page manager. 
> 
> So, what my recent patches do is carve up the total writeback cache
> size, or dirty page limit as we call it, proportionally to a BDIs
> writeout speed. So a fast device gets more than a slow device, but will
> not starve it.
> 
> However, for this to work, each device, or remote backing store in the
> case of networked filesystems, need to have a BDI.
> 
> >   I had been thinking about setting bdi->ra_pages
> > so that we do more sensible readahead and writebehind - better
> > matching what is possible over the network and what the server
> > prefers.  
> 
> Well, you'd first have to create backing_dev_info instances before
> setting that value :-)
> 
> >   SMB/CIFS Servers typically allow a maximum of 50 requests
> > in parallel at one time from one client (although this is adjustable
> > for some).
> 
> That seems like a perfect point to set congestion.
> 
> So in short, stick a struct backing_dev_info into whatever represents a
> client, initialize it using bdi_init(), destroy using bdi_destroy().

Oh, and the most important point, make your fresh I_NEW inodes point to
this bdi struct.

> Mark it congested once you have 50 (or more) outstanding requests, clear
> congestion when you drop below 50.
> 
> and you should be set.
> 

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Networked filesystems vs backing_dev_info

2007-10-27 Thread Peter Zijlstra
On Sat, 2007-10-27 at 16:02 -0500, Steve French wrote:
> On 10/27/07, Peter Zijlstra <[EMAIL PROTECTED]> wrote:
> > Hi,
> >
> > I had me a little look at bdi usage in networked filesystems.
> >
> >  NFS, CIFS, (smbfs), AFS, CODA and NCP
> >
> > And of those, NFS is the only one that I could find that creates
> > backing_dev_info structures. The rest seems to fall back to
> > default_backing_dev_info.
> >
> > With my recent per bdi dirty limit patches the bdi has become more
> > important than it has been in the past. While falling back to the
> > default_backing_dev_info isn't wrong per-se, it isn't right either.
> >
> > Could I implore the various maintainers to look into this issue for
> > their respective filesystem. I'll try and come up with some patches to
> > address this, but feel free to beat me to it.
> 
> I would like to understand more about your patches to see what bdi
> values makes sense for CIFS and how to report possible congestion back
> to the page manager. 

So, what my recent patches do is carve up the total writeback cache
size, or dirty page limit as we call it, proportionally to a BDIs
writeout speed. So a fast device gets more than a slow device, but will
not starve it.

However, for this to work, each device, or remote backing store in the
case of networked filesystems, need to have a BDI.

>   I had been thinking about setting bdi->ra_pages
> so that we do more sensible readahead and writebehind - better
> matching what is possible over the network and what the server
> prefers.  

Well, you'd first have to create backing_dev_info instances before
setting that value :-)

>   SMB/CIFS Servers typically allow a maximum of 50 requests
> in parallel at one time from one client (although this is adjustable
> for some).

That seems like a perfect point to set congestion.

So in short, stick a struct backing_dev_info into whatever represents a
client, initialize it using bdi_init(), destroy using bdi_destroy().

Mark it congested once you have 50 (or more) outstanding requests, clear
congestion when you drop below 50.

and you should be set.



signature.asc
Description: This is a digitally signed message part


Re: Networked filesystems vs backing_dev_info

2007-10-27 Thread Steve French
On 10/27/07, Peter Zijlstra <[EMAIL PROTECTED]> wrote:
> Hi,
>
> I had me a little look at bdi usage in networked filesystems.
>
>  NFS, CIFS, (smbfs), AFS, CODA and NCP
>
> And of those, NFS is the only one that I could find that creates
> backing_dev_info structures. The rest seems to fall back to
> default_backing_dev_info.
>
> With my recent per bdi dirty limit patches the bdi has become more
> important than it has been in the past. While falling back to the
> default_backing_dev_info isn't wrong per-se, it isn't right either.
>
> Could I implore the various maintainers to look into this issue for
> their respective filesystem. I'll try and come up with some patches to
> address this, but feel free to beat me to it.

I would like to understand more about your patches to see what bdi
values makes sense for CIFS and how to report possible congestion back
to the page manager.   I had been thinking about setting bdi->ra_pages
so that we do more sensible readahead and writebehind - better
matching what is possible over the network and what the server
prefers.SMB/CIFS Servers typically allow a maximum of 50 requests
in parallel at one time from one client (although this is adjustable
for some). The CIFS client prefers to do writes 14 pages (an iovec of
56K) at a time (although many servers can efficiently handle multiple
of these 56K writes in parallel).  With minor changes CIFS could
handle even larger writes (to just under 64K for Windows and just
under 128K for Samba - the current CIFS Unix Extensions allow servers
to negotiate much larger writes, but lacking a "receivepage"
equivalent Samba does not currently support larger than 128K).
Ideally, to improve large file copy utilization, I would like to see
from 3-10 writes of 56K (or larger in the future) in parallel.   The
read path is harder since we only do 16K reads to Windows and Samba -
but we need to increase the number of these that are done in parallel
on the same inode.  There is a large Google Summer of Code patch for
this which needs more review.


-- 
Thanks,

Steve
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: msync(2) bug(?), returns AOP_WRITEPAGE_ACTIVATE to userland

2007-10-27 Thread Erez Zadok
In message <[EMAIL PROTECTED]>, Hugh Dickins writes:
> On Mon, 22 Oct 2007, Erez Zadok wrote:
[...]
> > If you've got suggestions how I can handle unionfs_write more cleanly, or
> > comments on the above possibilities, I'd love to hear them.
> 
> For now I think you should pursue the ~(__GFP_FS|__GFP_IO) idea somehow.
> 
> Hugh

Hugh, thanks for the great explanations and suggestions (in multiple
emails).  I'm going to test all of those soon.

Erez.
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 0/6][RFC] Cleanup FIBMAP

2007-10-27 Thread Anton Altaparmakov

Hi,

->bmap is ugly and horrible!  If you have to do this at the very least  
please cause ->bmap64 to be able to return error values in case the  
file system failed to get the information or indeed such information  
does not exist as is the case for compressed and encrypted files for  
example and also for small files that are inside the on-disk inode  
(NTFS resident files and reiserfs packed tails are examples of this).


And another of my pet peeves with ->bmap is that it uses 0 to mean  
"sparse" which causes a conflict on NTFS at least as block zero is  
part of the $Boot system file so it is a real, valid block...  NTFS  
uses -1 to denote sparse blocks internally.


Best regards,

Anton

On 27 Oct 2007, at 00:37, Mike Waychison wrote:

The following series is meant to clean up FIBMAP paths with the  
eventual goal of allowing users to be able to FIBMAP their data.


I'm sending this as an RFC as I've only tested this on a x86_64  
kernel with a 32bit binary on ext2 and I've noticed a couple  
ext2_warnings already.


I'm unsure of the locking in [4/6] fix_race_with_truncate.patch.   
Any help here would greatly be appreciated.


The last patch, [6/6] drop_cap_sys_rawio_for_fibmap.patch, is of  
course, not to be applied until any remaining issues are fixed :)


Thanks,

Mike Waychison



--
Anton Altaparmakov  (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer, http://www.linux-ntfs.org/

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Networked filesystems vs backing_dev_info

2007-10-27 Thread Jan Harkes
On Sat, Oct 27, 2007 at 11:34:26AM +0200, Peter Zijlstra wrote:
> I had me a little look at bdi usage in networked filesystems.
> 
>  NFS, CIFS, (smbfs), AFS, CODA and NCP
> 
> And of those, NFS is the only one that I could find that creates
> backing_dev_info structures. The rest seems to fall back to
> default_backing_dev_info.

While a file is opened in Coda we associate the open file handle with a
local cache file. All read and write operations are redirected to this
local file and we even redirect inode->i_mapping. Actual reads and
writes are completely handled by the underlying file system. We send the
new file contents back to the servers only after all local references
have been released (last-close semantics).

As a result, there is no need for backing_dev_info structures in Coda,
if any congestion control is needed it will be handled by the underlying
file system where our locally cached copies are stored.

Jan
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Networked filesystems vs backing_dev_info

2007-10-27 Thread Peter Zijlstra

On Sat, 2007-10-27 at 11:22 -0400, Jan Harkes wrote:
> On Sat, Oct 27, 2007 at 11:34:26AM +0200, Peter Zijlstra wrote:
> > I had me a little look at bdi usage in networked filesystems.
> > 
> >  NFS, CIFS, (smbfs), AFS, CODA and NCP
> > 
> > And of those, NFS is the only one that I could find that creates
> > backing_dev_info structures. The rest seems to fall back to
> > default_backing_dev_info.
> 
> While a file is opened in Coda we associate the open file handle with a
> local cache file. All read and write operations are redirected to this
> local file and we even redirect inode->i_mapping. Actual reads and
> writes are completely handled by the underlying file system. We send the
> new file contents back to the servers only after all local references
> have been released (last-close semantics).
> 
> As a result, there is no need for backing_dev_info structures in Coda,
> if any congestion control is needed it will be handled by the underlying
> file system where our locally cached copies are stored.

Ok, that works. Thanks for this explanation!

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [0/3] Distributed storage. Mirror algo extension for automatic recovery.

2007-10-27 Thread Evgeniy Polyakov
Hi Andrew.

On Fri, Oct 26, 2007 at 01:55:38PM -0700, Andrew Morton ([EMAIL PROTECTED]) 
wrote:
> > I'm pleased to announce sixth release of the distributed storage
> > subsystem, which allows to form a storage on top of remote and local
> > nodes, which in turn can be exported to another storage as a node to
> > form tree-like storages.
> 
> I went back and re-read last month's discussion and I'm not seeing any
> reason why we shouldn't start thinking about merging this.
> 
> How close is it to that stage?  A peek at your development blog indicates
> that things are still changing at a moderate rate?

I completed storage layer development itself, the only remaining todo item
is to implement new redundancy algorithm, but I did not see major demand
on that, so it will stay for now with low priority.

I will use DST as a transport layer for distributed filesystem, and
probably that will require additional features, I have no clean design so
far, but right now I have nothing in the pipe to commit to DST.


-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Networked filesystems vs backing_dev_info

2007-10-27 Thread Peter Zijlstra
Hi,

I had me a little look at bdi usage in networked filesystems.

 NFS, CIFS, (smbfs), AFS, CODA and NCP

And of those, NFS is the only one that I could find that creates
backing_dev_info structures. The rest seems to fall back to
default_backing_dev_info.

With my recent per bdi dirty limit patches the bdi has become more
important than it has been in the past. While falling back to the
default_backing_dev_info isn't wrong per-se, it isn't right either. 

Could I implore the various maintainers to look into this issue for
their respective filesystem. I'll try and come up with some patches to
address this, but feel free to beat me to it.

peterz

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html