Replace the global part of the lglock with a percpu-rwsem.

Since fcl_lock is a spinlock and itself nests under i_lock, which too
is a spinlock we cannot acquire sleeping locks at
locks_{insert,remove}_global_locks().

We can however wrap all fcl_lock acquisitions with percpu_down_read
such that all invocations of locks_{insert,remove}_global_locks() have
that read lock held.

This allows us to replace the lg_global part of the lglock with the
write side of the rwsem.

In the absense of writers, percpu_{down,up}_read() are free of atomic
instructions. This further avoids the very long preempt-disable
regions caused by lglock on larger machines.

Cc: Al Viro <v...@zeniv.linux.org.uk>
Cc: Oleg Nesterov <o...@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 fs/locks.c |   18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

--- a/fs/locks.c
+++ b/fs/locks.c
@@ -165,6 +165,7 @@ int lease_break_time = 45;
  */
 DEFINE_STATIC_LGLOCK(file_lock_lglock);
 static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
+static struct percpu_rw_semaphore file_rwsem;
 
 /*
  * The blocked_hash is used to find POSIX lock loops for deadlock detection.
@@ -556,6 +557,8 @@ static int posix_same_owner(struct file_
 /* Must be called with the flc_lock held! */
 static void locks_insert_global_locks(struct file_lock *fl)
 {
+       lockdep_assert_held_percpu_rwsem(&file_rwsem);
+
        lg_local_lock(&file_lock_lglock);
        fl->fl_link_cpu = smp_processor_id();
        hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list));
@@ -565,6 +568,8 @@ static void locks_insert_global_locks(st
 /* Must be called with the flc_lock held! */
 static void locks_delete_global_locks(struct file_lock *fl)
 {
+       lockdep_assert_held_percpu_rwsem(&file_rwsem);
+
        /*
         * Avoid taking lock if already unhashed. This is safe since this check
         * is done while holding the flc_lock, and new insertions into the list
@@ -885,6 +890,7 @@ static int flock_lock_file(struct file *
                        return -ENOMEM;
        }
 
+       percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        if (request->fl_flags & FL_ACCESS)
                goto find_conflict;
@@ -925,6 +931,7 @@ static int flock_lock_file(struct file *
 
 out:
        spin_unlock(&ctx->flc_lock);
+       percpu_up_read(&file_rwsem);
        if (new_fl)
                locks_free_lock(new_fl);
        locks_dispose_list(&dispose);
@@ -960,6 +967,7 @@ static int __posix_lock_file(struct inod
                new_fl2 = locks_alloc_lock();
        }
 
+       percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        /*
         * New lock request. Walk all POSIX locks and look for conflicts. If
@@ -1131,6 +1139,7 @@ static int __posix_lock_file(struct inod
        }
  out:
        spin_unlock(&ctx->flc_lock);
+       percpu_up_read(&file_rwsem);
        /*
         * Free any unused locks.
         */
@@ -1407,6 +1416,7 @@ int __break_lease(struct inode *inode, u
                return error;
        }
 
+       percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
 
        time_out_leases(inode, &dispose);
@@ -1477,6 +1487,7 @@ int __break_lease(struct inode *inode, u
        }
 out:
        spin_unlock(&ctx->flc_lock);
+       percpu_up_read(&file_rwsem);
        locks_dispose_list(&dispose);
        locks_free_lock(new_fl);
        return error;
@@ -1630,6 +1641,7 @@ generic_add_lease(struct file *filp, lon
                return -EINVAL;
        }
 
+       percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        time_out_leases(inode, &dispose);
        error = check_conflicting_open(dentry, arg, lease->fl_flags);
@@ -1700,6 +1712,7 @@ generic_add_lease(struct file *filp, lon
                lease->fl_lmops->lm_setup(lease, priv);
 out:
        spin_unlock(&ctx->flc_lock);
+       percpu_up_read(&file_rwsem);
        locks_dispose_list(&dispose);
        if (is_deleg)
                mutex_unlock(&inode->i_mutex);
@@ -1722,6 +1735,7 @@ static int generic_delete_lease(struct f
                return error;
        }
 
+       percpu_down_read(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
                if (fl->fl_file == filp &&
@@ -1734,6 +1748,7 @@ static int generic_delete_lease(struct f
        if (victim)
                error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
        spin_unlock(&ctx->flc_lock);
+       percpu_up_read(&file_rwsem);
        locks_dispose_list(&dispose);
        return error;
 }
@@ -2634,6 +2649,7 @@ static void *locks_start(struct seq_file
        struct locks_iterator *iter = f->private;
 
        iter->li_pos = *pos + 1;
+       percpu_down_write(&file_rwsem);
        lg_global_lock(&file_lock_lglock);
        spin_lock(&blocked_lock_lock);
        return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos);
@@ -2652,6 +2668,7 @@ static void locks_stop(struct seq_file *
 {
        spin_unlock(&blocked_lock_lock);
        lg_global_unlock(&file_lock_lglock);
+       percpu_up_write(&file_rwsem);
 }
 
 static const struct seq_operations locks_seq_operations = {
@@ -2693,6 +2710,7 @@ static int __init filelock_init(void)
                        sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
 
        lg_lock_init(&file_lock_lglock, "file_lock_lglock");
+       percpu_init_rwsem(&file_rwsem);
 
        for_each_possible_cpu(i)
                INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i));


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to