Re: [PATCH 1/3] Unionfs: use printk KERN_CONT for debugging messages

2008-01-02 Thread Joe Perches
On Thu, 2008-01-03 at 00:57 -0500, Erez Zadok wrote:
> diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
> index c2b8b58..5f1d887 100644
> --- a/fs/unionfs/debug.c
> +++ b/fs/unionfs/debug.c
>  void __show_inode_times(const struct inode *inode,
> @@ -472,15 +473,15 @@ void __show_inode_times(const struct inode *inode,
>   if (unlikely(!lower_inode))
>   continue;
>   pr_debug("IT(%lu:%d): ", inode->i_ino, bindex);
> - pr_debug("%s:%s:%d ", file, fxn, line);
> - pr_debug("um=%lu/%lu lm=%lu/%lu ",
> -  inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> -  lower_inode->i_mtime.tv_sec,
> -  lower_inode->i_mtime.tv_nsec);
> - pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
> -  inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> -  lower_inode->i_ctime.tv_sec,
> -  lower_inode->i_ctime.tv_nsec);
> + printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
> + printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
> +inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> +lower_inode->i_mtime.tv_sec,
> +lower_inode->i_mtime.tv_nsec);
> + printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
> +inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> +lower_inode->i_ctime.tv_sec,
> +lower_inode->i_ctime.tv_nsec);
>   }
>  }
>  

I think printks should be single statements and
KERN_CONT should be used as sparingly as possible.

Perhaps:
pr_debug("IT(%lu:%d): %s:%s:%d "
 "um=%lu/%lu lm=%lu/%lu "
 "uc=%lu/%lu lc=%lu/%lu\n",
 inode->i_ino, bindex, file, fnx, line,
 inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
 lower_inode->i_mtime.tv_sec,
 lower_inode->i_mtime.tv_nsec
 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
 lower_inode->i_ctime.tv_sec,
 lower_inode->i_ctime.tv_nsec);

> @@ -497,15 +498,15 @@ void __show_dinode_times(const struct dentry *dentry,
>   continue;
>   pr_debug("DT(%s:%lu:%d): ", dentry->d_name.name, inode->i_ino,
>bindex);
> - pr_debug("%s:%s:%d ", file, fxn, line);
> - pr_debug("um=%lu/%lu lm=%lu/%lu ",
> -  inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> -  lower_inode->i_mtime.tv_sec,
> -  lower_inode->i_mtime.tv_nsec);
> - pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
> -  inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> -  lower_inode->i_ctime.tv_sec,
> -  lower_inode->i_ctime.tv_nsec);
> + printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
> + printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
> +inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
> +lower_inode->i_mtime.tv_sec,
> +lower_inode->i_mtime.tv_nsec);
> + printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
> +inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
> +lower_inode->i_ctime.tv_sec,
> +lower_inode->i_ctime.tv_nsec);
>   }
>  }
>  

and
pr_debug("DT(%s:%lu:%d): %s:%s:%d "
 "um=%lu/%lu lm=%lu/%lu "
 "uc=%lu/%lu lc=%lu/%lu\n",
 dentry->d_name.name, inode->i_ino, bindex,
 file, fnx, line,
 inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
 lower_inode->i_mtime.tv_sec,
 lower_inode->i_mtime.tv_nsec
 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
 lower_inode->i_ctime.tv_sec,
 lower_inode->i_ctime.tv_nsec);

> 
> @@ -524,9 +525,10 @@ void __show_inode_counts(const struct inode *inode,
>   lower_inode = unionfs_lower_inode_idx(inode, bindex);
>   if (unlikely(!lower_inode))
>   continue;
> - pr_debug("SIC(%lu:%d:%d): ", inode->i_ino, bindex,
> -  atomic_read(&(inode)->i_count));
> - pr_debug("lc=%d ", atomic_read(&(lower_inode)->i_count));
> - pr_debug("%s:%s:%d\n", file, fxn, line);
> + printk(KERN_CONT "SIC(%lu:%d:%d): ", inode->i_ino, bindex,
> +atomic_read(&(inode)->i_count));
> + printk(KERN_CONT "lc=%d ",
> +atomic_read(&(lower_inode)->i_count));
> + printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
>   }
>  }

and
pr_debug("SIC(%l

[PATCH 3/3] Unionfs: use VFS helpers to manipulate i_nlink

2008-01-02 Thread Erez Zadok
Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
---
 fs/unionfs/unlink.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c
index a1c82b6..1e370a1 100644
--- a/fs/unionfs/unlink.c
+++ b/fs/unionfs/unlink.c
@@ -79,7 +79,7 @@ static int unionfs_unlink_whiteout(struct inode *dir, struct 
dentry *dentry)
 
 out:
if (!err)
-   dentry->d_inode->i_nlink--;
+   inode_dec_link_count(dentry->d_inode);
 
/* We don't want to leave negative leftover dentries for revalidate. */
if (!err && (dbopaque(dentry) != -1))
-- 
1.5.2.2

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] Unionfs: locking fixes

2008-01-02 Thread Erez Zadok
Lock parent dentries during revalidation.
Reduce total number of lockdep classes used.

Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
---
 fs/unionfs/dentry.c |   13 -
 fs/unionfs/fanout.h |3 ++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c
index 0369d93..7646828 100644
--- a/fs/unionfs/dentry.c
+++ b/fs/unionfs/dentry.c
@@ -42,6 +42,7 @@ static bool __unionfs_d_revalidate_one(struct dentry *dentry,
memset(&lowernd, 0, sizeof(struct nameidata));
 
verify_locked(dentry);
+   verify_locked(dentry->d_parent);
 
/* if the dentry is unhashed, do NOT revalidate */
if (d_deleted(dentry))
@@ -351,7 +352,10 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, 
struct nameidata *nd,
 * to child order.
 */
for (i = 0; i < chain_len; i++) {
-   unionfs_lock_dentry(chain[i], UNIONFS_DMUTEX_REVAL+i);
+   unionfs_lock_dentry(chain[i], UNIONFS_DMUTEX_REVAL_CHILD);
+   if (chain[i] != chain[i]->d_parent)
+   unionfs_lock_dentry(chain[i]->d_parent,
+   UNIONFS_DMUTEX_REVAL_PARENT);
saved_bstart = dbstart(chain[i]);
saved_bend = dbend(chain[i]);
sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
@@ -366,6 +370,8 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, 
struct nameidata *nd,
 bindex++)
unionfs_mntput(chain[i], bindex);
}
+   if (chain[i] != chain[i]->d_parent)
+   unionfs_unlock_dentry(chain[i]->d_parent);
unionfs_unlock_dentry(chain[i]);
 
if (unlikely(!valid))
@@ -376,6 +382,9 @@ bool __unionfs_d_revalidate_chain(struct dentry *dentry, 
struct nameidata *nd,
 out_this:
/* finally, lock this dentry and revalidate it */
verify_locked(dentry);
+   if (dentry != dentry->d_parent)
+   unionfs_lock_dentry(dentry->d_parent,
+   UNIONFS_DMUTEX_REVAL_PARENT);
dgen = atomic_read(&UNIONFS_D(dentry)->generation);
 
if (unlikely(is_newer_lower(dentry))) {
@@ -394,6 +403,8 @@ out_this:
purge_inode_data(dentry->d_inode);
}
valid = __unionfs_d_revalidate_one(dentry, nd);
+   if (dentry != dentry->d_parent)
+   unionfs_unlock_dentry(dentry->d_parent);
 
/*
 * If __unionfs_d_revalidate_one() succeeded above, then it will
diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h
index 5f31015..4d9a45f 100644
--- a/fs/unionfs/fanout.h
+++ b/fs/unionfs/fanout.h
@@ -290,7 +290,8 @@ enum unionfs_dentry_lock_class {
UNIONFS_DMUTEX_PARENT,
UNIONFS_DMUTEX_CHILD,
UNIONFS_DMUTEX_WHITEOUT,
-   UNIONFS_DMUTEX_REVAL,   /* for file/dentry revalidate */
+   UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */
+   UNIONFS_DMUTEX_REVAL_CHILD,   /* for file/dentry revalidate */
 };
 
 static inline void unionfs_lock_dentry(struct dentry *d,
-- 
1.5.2.2

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] Unionfs: use printk KERN_CONT for debugging messages

2008-01-02 Thread Erez Zadok
Signed-off-by: Erez Zadok <[EMAIL PROTECTED]>
---
 fs/unionfs/debug.c |   50 ++
 1 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c
index c2b8b58..5f1d887 100644
--- a/fs/unionfs/debug.c
+++ b/fs/unionfs/debug.c
@@ -456,9 +456,10 @@ void __show_branch_counts(const struct super_block *sb,
mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt;
else
mnt = NULL;
-   pr_debug("%d:", (mnt ? atomic_read(&mnt->mnt_count) : -99));
+   printk(KERN_CONT "%d:",
+  (mnt ? atomic_read(&mnt->mnt_count) : -99));
}
-   pr_debug("%s:%s:%d\n", file, fxn, line);
+   printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
 }
 
 void __show_inode_times(const struct inode *inode,
@@ -472,15 +473,15 @@ void __show_inode_times(const struct inode *inode,
if (unlikely(!lower_inode))
continue;
pr_debug("IT(%lu:%d): ", inode->i_ino, bindex);
-   pr_debug("%s:%s:%d ", file, fxn, line);
-   pr_debug("um=%lu/%lu lm=%lu/%lu ",
-inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
-lower_inode->i_mtime.tv_sec,
-lower_inode->i_mtime.tv_nsec);
-   pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
-inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
-lower_inode->i_ctime.tv_sec,
-lower_inode->i_ctime.tv_nsec);
+   printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
+   printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
+  inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+  lower_inode->i_mtime.tv_sec,
+  lower_inode->i_mtime.tv_nsec);
+   printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
+  inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+  lower_inode->i_ctime.tv_sec,
+  lower_inode->i_ctime.tv_nsec);
}
 }
 
@@ -497,15 +498,15 @@ void __show_dinode_times(const struct dentry *dentry,
continue;
pr_debug("DT(%s:%lu:%d): ", dentry->d_name.name, inode->i_ino,
 bindex);
-   pr_debug("%s:%s:%d ", file, fxn, line);
-   pr_debug("um=%lu/%lu lm=%lu/%lu ",
-inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
-lower_inode->i_mtime.tv_sec,
-lower_inode->i_mtime.tv_nsec);
-   pr_debug("uc=%lu/%lu lc=%lu/%lu\n",
-inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
-lower_inode->i_ctime.tv_sec,
-lower_inode->i_ctime.tv_nsec);
+   printk(KERN_CONT "%s:%s:%d ", file, fxn, line);
+   printk(KERN_CONT "um=%lu/%lu lm=%lu/%lu ",
+  inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
+  lower_inode->i_mtime.tv_sec,
+  lower_inode->i_mtime.tv_nsec);
+   printk(KERN_CONT "uc=%lu/%lu lc=%lu/%lu\n",
+  inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+  lower_inode->i_ctime.tv_sec,
+  lower_inode->i_ctime.tv_nsec);
}
 }
 
@@ -524,9 +525,10 @@ void __show_inode_counts(const struct inode *inode,
lower_inode = unionfs_lower_inode_idx(inode, bindex);
if (unlikely(!lower_inode))
continue;
-   pr_debug("SIC(%lu:%d:%d): ", inode->i_ino, bindex,
-atomic_read(&(inode)->i_count));
-   pr_debug("lc=%d ", atomic_read(&(lower_inode)->i_count));
-   pr_debug("%s:%s:%d\n", file, fxn, line);
+   printk(KERN_CONT "SIC(%lu:%d:%d): ", inode->i_ino, bindex,
+  atomic_read(&(inode)->i_count));
+   printk(KERN_CONT "lc=%d ",
+  atomic_read(&(lower_inode)->i_count));
+   printk(KERN_CONT "%s:%s:%d\n", file, fxn, line);
}
 }
-- 
1.5.2.2

-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[GIT PULL -mm] 0/3 Unionfs updates/fixes/cleanups

2008-01-02 Thread Erez Zadok

The following is a series of patchsets related to Unionfs.  This is the
third set of patchsets resulting from an lkml review of the entire unionfs
code base.  The most significant change here is a locking/race bugfix during
dentry revalidation.

These patches were tested (where appropriate) on Linus's 2.6.24 latest code
(as of v2.6.24-rc6-179-gb8c9a18), MM, as well as the backports to
2.6.{23,22,21,20,19,18,9} on ext2/3/4, xfs, reiserfs, nfs2/3/4, jffs2,
ramfs, tmpfs, cramfs, and squashfs (where available).  Also tested with
LTP-full.  See http://unionfs.filesystems.org/ to download back-ported
unionfs code.

Please pull from the 'master' branch of
git://git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git

to receive the following:

Erez Zadok (3):
  Unionfs: use printk KERN_CONT for debugging messages
  Unionfs: locking fixes
  Unionfs: use VFS helpers to manipulate i_nlink

 debug.c  |   50 ++
 dentry.c |   13 -
 fanout.h |3 ++-
 unlink.c |2 +-
 4 files changed, 41 insertions(+), 27 deletions(-)

---
Erez Zadok
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] ext2: remove unused ext2_put_inode prototype

2008-01-02 Thread Christoph Hellwig

Signed-off-by: Christoph Hellwig <[EMAIL PROTECTED]>

Index: linux-2.6/fs/ext2/ext2.h
===
--- linux-2.6.orig/fs/ext2/ext2.h   2007-12-31 10:12:12.0 +0100
+++ linux-2.6/fs/ext2/ext2.h2007-12-31 10:12:15.0 +0100
@@ -126,7 +126,6 @@ extern unsigned long ext2_count_free (st
 /* inode.c */
 extern void ext2_read_inode (struct inode *);
 extern int ext2_write_inode (struct inode *, int);
-extern void ext2_put_inode (struct inode *);
 extern void ext2_delete_inode (struct inode *);
 extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] ufs: remove unneeded ufs_put_inode prototype

2008-01-02 Thread Christoph Hellwig

Signed-off-by: Christoph Hellwig <[EMAIL PROTECTED]>

Index: linux-2.6/fs/ufs/ufs.h
===
--- linux-2.6.orig/fs/ufs/ufs.h 2007-12-31 10:12:42.0 +0100
+++ linux-2.6/fs/ufs/ufs.h  2007-12-31 10:12:46.0 +0100
@@ -107,7 +107,6 @@ extern struct inode * ufs_new_inode (str
 
 /* inode.c */
 extern void ufs_read_inode (struct inode *);
-extern void ufs_put_inode (struct inode *);
 extern int ufs_write_inode (struct inode *, int);
 extern int ufs_sync_inode (struct inode *);
 extern void ufs_delete_inode (struct inode *);
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ANNOUNCE] util-linux-ng 2.13.1-rc2

2008-01-02 Thread Karel Zak


 The second util-linux-ng 2.13.1 release candidate is available at

ftp://ftp.kernel.org/pub/linux/utils/util-linux-ng/

 (Note, 2.13.1 is stable maintenance release.)

 Feedback and bug reports, as always, are welcomed.

Karel


v2.13.1-rc2 Changelog:
--

blockdev:
   - add --getsz to blockdev.8  [Karel Zak]
build-sys:
   - release++ (-rc2)  [Karel Zak]
   - remove hardcoded _GNU_SOURCE  [Karel Zak]
docs:
   - add note about incorrect tag 2.13.1  [Karel Zak]
   - update AUTHORS file, add all translators  [Karel Zak]
   - update ReleaseNotes  [Karel Zak]
getopt:
   - fix path to examples in getopt.1  [Karel Zak]
hwclock:
   - check for ENODEV  [David Woodhouse]
mount:
   - don't call canonicalize(SPEC) for cifs, smbfs and nfs  [Karel Zak]
   - fix fd leak  [Matthias Koenig]
po:
   - add eu.po (from translationproject.org)  [Mikel Olasagasti]
   - add pl.po (from translationproject.org)  [Andrzej Krzysztofowicz]
   - update ca.po (from translationproject.org)  [Josep Puigdemont]
   - update cs.po (from translationproject.org)  [Petr Pisar]
   - update da.po (from translationproject.org)  [Claus Hindsgaul]
   - update de.po (from translationproject.org)  [Michael Piefel]
   - update es.po (from translationproject.org)  [Santiago Vila Doncel]
   - update et.po (from translationproject.org)  [Meelis Roos]
   - update fi.po (from translationproject.org)  [Lauri Nurmi]
   - update fr.po (from translationproject.org)  [Michel Robitaille]
   - update hu.po (from translationproject.org)  [Gabor Kelemen]
   - update id.po (from translationproject.org)  [Arif E. Nugroho]
   - update it.po (from translationproject.org)  [Marco Colombo]
   - update ja.po (from translationproject.org)  [Daisuke Yamashita]
   - update nl.po (from translationproject.org)  [Benno Schulenberg]
   - update po files  [Karel Zak]
   - update pt_BR.po (from translationproject.org)  [Rodrigo Stulzer Lopes]
   - update ru.po (from translationproject.org)  [Pavel Maryanov]
   - update sl.po (from translationproject.org)  [Simon Mihevc]
   - update sv.po (from translationproject.org)  [Daniel Nylander]
   - update tr.po (from translationproject.org)  [Nilgün Belma Bugüner]
   - update uk.po (from translationproject.org)  [Maxim V. Dziumanenko]
   - update vi.po (from translationproject.org)  [Clytie Siddall]
sfdisk:
   - allow partitioning drives of over 2^31 sectors.  [Kunihiko IMAI]



v2.13.1-rc2 diffstat:
-

 AUTHORS   |   38 +-
 NEWS  |5 +
 README.devel  |4 +
 configure.ac  |2 +-
 disk-utils/blockdev.8 |9 +-
 disk-utils/fsck.cramfs.c  |1 -
 docs/v2.13.1-ReleaseNotes |   36 +-
 fdisk/sfdisk.c|3 +-
 getopt/getopt.1   |5 +-
 hwclock/rtc.c |2 +-
 mount/lomount.c   |1 +
 mount/mount.c |   21 +-
 po/ca.po  |  258 +-
 po/cs.po  |  324 +-
 po/da.po  |  256 +-
 po/de.po  |  540 ++--
 po/es.po  |  275 +-
 po/et.po  |  441 +--
 po/eu.po  | 9466 
 po/fi.po  |  240 +-
 po/fr.po  |  281 +-
 po/hu.po  | 1735 +
 po/id.po  |  291 +-
 po/it.po  |  306 +-
 po/ja.po  |  501 +--
 po/nl.po  |  328 +-
 po/pl.po  | 9467 +
 po/pt_BR.po   |  708 ++--
 po/ru.po  |  280 +-
 po/sl.po  |  260 +-
 po/sv.po  |  291 +-
 po/tr.po  |  286 +-
 po/uk.po  |  274 +-
 po/util-linux-ng.pot  |  240 +-
 po/vi.po  |  310 +-
 schedutils/chrt.c |2 -
 schedutils/taskset.c  |2 -
 sys-utils/setarch.c   |4 -
 38 files changed, 23079 insertions(+), 4414 deletions(-)

-- 
 Karel Zak  <[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: lockdep warning with LTP dio test (v2.6.24-rc6-125-g5356f66)

2008-01-02 Thread Zach Brown
Erez Zadok wrote:
> Setting: ltp-full-20071031, dio01 test on ext3 with Linus's latest tree.
> Kernel w/ SMP, preemption, and lockdep configured.

This is a real lock ordering problem.  Thanks for reporting it.

The updating of atime inside sys_mmap() orders the mmap_sem in the vfs
outside of the journal handle in ext3's inode dirtying:

> -> #1 (jbd_handle){--..}:
>[] __lock_acquire+0x9cc/0xb95
>[] lock_acquire+0x5f/0x78
>[] journal_start+0xee/0xf8
>[] ext3_journal_start_sb+0x48/0x4a
>[] ext3_dirty_inode+0x27/0x6c
>[] __mark_inode_dirty+0x29/0x144
>[] touch_atime+0xb7/0xbc
>[] generic_file_mmap+0x2d/0x42
>[] mmap_region+0x1e6/0x3b4
>[] do_mmap_pgoff+0x1fb/0x253
>[] sys_mmap2+0x9b/0xb5
>[] syscall_call+0x7/0xb
>[] 0x

ext3_direct_IO() orders the journal handle outside of the mmap_sem that
dio_get_page() acquires to pin pages with get_user_pages():

> -> #0 (&mm->mmap_sem){}:
>[] __lock_acquire+0x8bc/0xb95
>[] lock_acquire+0x5f/0x78
>[] down_read+0x3a/0x4c
>[] dio_get_page+0x4e/0x15d
>[] __blockdev_direct_IO+0x431/0xa81
>[] ext3_direct_IO+0x10c/0x1a1
>[] generic_file_direct_IO+0x124/0x139
>[] generic_file_direct_write+0x56/0x11c
>[] __generic_file_aio_write_nolock+0x33d/0x489
>[] generic_file_aio_write+0x58/0xb6
>[] ext3_file_write+0x27/0x99
>[] do_sync_write+0xc5/0x102
>[] vfs_write+0x90/0x119
>[] sys_write+0x3d/0x61
>[] sysenter_past_esp+0x5f/0xa5
>[] 0x

Two fixes come to mind:

1) use something like Peter's ->mmap_prepare() to update atime before
acquiring the mmap_sem.  ( http://lkml.org/lkml/2007/11/11/97 ).  I
don't know if this would leave more paths which do a journal_start()
while holding the mmap_sem.

2) rework ext3's dio to only hold the jbd handle in ext3_get_block().
Chris has a patch for this kicking around somewhere but I'm told it has
problems exposing old blocks in ordered data mode.

Does anyone have preferences?  I could go either way.  I certainly don't
like the idea of journal handles being held across the entirety of
fs/direct-io.c.  It's yet another case of O_DIRECT differing wildly from
the buffered path :(.

- z
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html