There's a number of iffy in mutex because mutex::count and
mutex::owner are two different fields; this too is the reason
MUTEX_SPIN_ON_OWNER and DEBUG_MUTEX are mutually exclusive.

Cure this by folding them into a single atomic_long_t field.

This nessecairly kills all the architecture specific mutex code.

Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 arch/alpha/include/asm/mutex.h      |    9 -
 arch/arc/include/asm/mutex.h        |   18 --
 arch/arm/include/asm/mutex.h        |   21 --
 arch/arm64/include/asm/Kbuild       |    1 
 arch/avr32/include/asm/mutex.h      |    9 -
 arch/blackfin/include/asm/Kbuild    |    1 
 arch/c6x/include/asm/mutex.h        |    6 
 arch/cris/include/asm/mutex.h       |    9 -
 arch/frv/include/asm/mutex.h        |    9 -
 arch/h8300/include/asm/mutex.h      |    9 -
 arch/hexagon/include/asm/mutex.h    |    8 
 arch/ia64/include/asm/mutex.h       |   90 ----------
 arch/m32r/include/asm/mutex.h       |    9 -
 arch/m68k/include/asm/Kbuild        |    1 
 arch/metag/include/asm/Kbuild       |    1 
 arch/microblaze/include/asm/mutex.h |    1 
 arch/mips/include/asm/Kbuild        |    1 
 arch/mn10300/include/asm/mutex.h    |   16 -
 arch/nios2/include/asm/mutex.h      |    1 
 arch/openrisc/include/asm/mutex.h   |   27 ---
 arch/parisc/include/asm/Kbuild      |    1 
 arch/powerpc/include/asm/mutex.h    |  132 ---------------
 arch/s390/include/asm/mutex.h       |    9 -
 arch/score/include/asm/mutex.h      |    6 
 arch/sh/include/asm/mutex-llsc.h    |  109 ------------
 arch/sh/include/asm/mutex.h         |   12 -
 arch/sparc/include/asm/Kbuild       |    1 
 arch/tile/include/asm/Kbuild        |    1 
 arch/um/include/asm/Kbuild          |    1 
 arch/unicore32/include/asm/mutex.h  |   20 --
 arch/x86/include/asm/mutex.h        |    5 
 arch/x86/include/asm/mutex_32.h     |  110 ------------
 arch/x86/include/asm/mutex_64.h     |  127 --------------
 arch/xtensa/include/asm/mutex.h     |    9 -
 include/asm-generic/mutex-dec.h     |   88 ----------
 include/asm-generic/mutex-null.h    |   19 --
 include/asm-generic/mutex-xchg.h    |  120 --------------
 include/asm-generic/mutex.h         |    9 -
 include/linux/mutex-debug.h         |   24 --
 include/linux/mutex.h               |   46 +++--
 kernel/locking/mutex-debug.c        |   13 -
 kernel/locking/mutex-debug.h        |   10 -
 kernel/locking/mutex.c              |  307 ++++++++++++++----------------------
 kernel/locking/mutex.h              |   26 ---
 kernel/sched/core.c                 |    2 
 45 files changed, 155 insertions(+), 1299 deletions(-)

--- a/arch/alpha/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/arc/include/asm/mutex.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * xchg() based mutex fast path maintains a state of 0 or 1, as opposed to
- * atomic dec based which can "count" any number of lock contenders.
- * This ideally needs to be fixed in core, but for now switching to dec ver.
- */
-#if defined(CONFIG_SMP) && (CONFIG_NR_CPUS > 2)
-#include <asm-generic/mutex-dec.h>
-#else
-#include <asm-generic/mutex-xchg.h>
-#endif
--- a/arch/arm/include/asm/mutex.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * arch/arm/include/asm/mutex.h
- *
- * ARM optimized mutex locking primitives
- *
- * Please look into asm-generic/mutex-xchg.h for a formal definition.
- */
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
-/*
- * On pre-ARMv6 hardware this results in a swp-based implementation,
- * which is the most efficient. For ARMv6+, we have exclusive memory
- * accessors and use atomic_dec to avoid the extra xchg operations
- * on the locking slowpaths.
- */
-#if __LINUX_ARM_ARCH__ < 6
-#include <asm-generic/mutex-xchg.h>
-#else
-#include <asm-generic/mutex-dec.h>
-#endif
-#endif /* _ASM_MUTEX_H */
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -26,7 +26,6 @@ generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += msi.h
-generic-y += mutex.h
 generic-y += pci.h
 generic-y += poll.h
 generic-y += preempt.h
--- a/arch/avr32/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -24,7 +24,6 @@ generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
-generic-y += mutex.h
 generic-y += param.h
 generic-y += percpu.h
 generic-y += pgalloc.h
--- a/arch/c6x/include/asm/mutex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_C6X_MUTEX_H
-#define _ASM_C6X_MUTEX_H
-
-#include <asm-generic/mutex-null.h>
-
-#endif /* _ASM_C6X_MUTEX_H */
--- a/arch/cris/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/frv/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/h8300/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/hexagon/include/asm/mutex.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-#include <asm-generic/mutex-xchg.h>
--- a/arch/ia64/include/asm/mutex.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * ia64 implementation of the mutex fastpath.
- *
- * Copyright (C) 2006 Ken Chen <kenneth.w.c...@intel.com>
- *
- */
-
-#ifndef _ASM_MUTEX_H
-#define _ASM_MUTEX_H
-
-/**
- *  __mutex_fastpath_lock - try to take the lock by moving the count
- *                          from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-       if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
-               fail_fn(count);
-}
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
-       if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
-               return -1;
-       return 0;
-}
-
-/**
- *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, then the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-       int ret = ia64_fetchadd4_rel(count, 1);
-       if (unlikely(ret < 0))
-               fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock()             1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- *
- * If the architecture has no effective trylock variant, it should call the
- * <fail_fn> spinlock-based trylock variant unconditionally.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-       if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
-               return 1;
-       return 0;
-}
-
-#endif
--- a/arch/m32r/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += mman.h
-generic-y += mutex.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += resource.h
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -27,7 +27,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
-generic-y += mutex.h
 generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
--- a/arch/microblaze/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mutex-dec.h>
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -9,7 +9,6 @@ generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mutex.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
--- a/arch/mn10300/include/asm/mutex.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* MN10300 Mutex fastpath
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowe...@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- *
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-#include <asm-generic/mutex-null.h>
--- a/arch/nios2/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mutex-dec.h>
--- a/arch/openrisc/include/asm/mutex.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * OpenRISC Linux
- *
- * Linux architectural port borrowing liberally from similar works of
- * others.  All original copyrights apply as per the original source
- * declaration.
- *
- * OpenRISC implementation:
- * Copyright (C) 2003 Matjaz Breskvar <phoe...@bsemi.com>
- * Copyright (C) 2010-2011 Jonas Bonn <jo...@southpole.se>
- * et al.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mutex.h
 generic-y += param.h
 generic-y += percpu.h
 generic-y += poll.h
--- a/arch/powerpc/include/asm/mutex.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Optimised mutex implementation of include/asm-generic/mutex-dec.h algorithm
- */
-#ifndef _ASM_POWERPC_MUTEX_H
-#define _ASM_POWERPC_MUTEX_H
-
-static inline int __mutex_cmpxchg_lock(atomic_t *v, int old, int new)
-{
-       int t;
-
-       __asm__ __volatile__ (
-"1:    lwarx   %0,0,%1         # mutex trylock\n\
-       cmpw    0,%0,%2\n\
-       bne-    2f\n"
-       PPC405_ERR77(0,%1)
-"      stwcx.  %3,0,%1\n\
-       bne-    1b"
-       PPC_ACQUIRE_BARRIER
-       "\n\
-2:"
-       : "=&r" (t)
-       : "r" (&v->counter), "r" (old), "r" (new)
-       : "cc", "memory");
-
-       return t;
-}
-
-static inline int __mutex_dec_return_lock(atomic_t *v)
-{
-       int t;
-
-       __asm__ __volatile__(
-"1:    lwarx   %0,0,%1         # mutex lock\n\
-       addic   %0,%0,-1\n"
-       PPC405_ERR77(0,%1)
-"      stwcx.  %0,0,%1\n\
-       bne-    1b"
-       PPC_ACQUIRE_BARRIER
-       : "=&r" (t)
-       : "r" (&v->counter)
-       : "cc", "memory");
-
-       return t;
-}
-
-static inline int __mutex_inc_return_unlock(atomic_t *v)
-{
-       int t;
-
-       __asm__ __volatile__(
-       PPC_RELEASE_BARRIER
-"1:    lwarx   %0,0,%1         # mutex unlock\n\
-       addic   %0,%0,1\n"
-       PPC405_ERR77(0,%1)
-"      stwcx.  %0,0,%1 \n\
-       bne-    1b"
-       : "=&r" (t)
-       : "r" (&v->counter)
-       : "cc", "memory");
-
-       return t;
-}
-
-/**
- *  __mutex_fastpath_lock - try to take the lock by moving the count
- *                          from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-       if (unlikely(__mutex_dec_return_lock(count) < 0))
-               fail_fn(count);
-}
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
-       if (unlikely(__mutex_dec_return_lock(count) < 0))
-               return -1;
-       return 0;
-}
-
-/**
- *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-       if (unlikely(__mutex_inc_return_unlock(count) <= 0))
-               fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock()             1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: fallback function
- *
- * Change the count from 1 to 0, and return 1 (success), or if the count
- * was not 1, then return 0 (failure).
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-       if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) 
== 1))
-               return 1;
-       return 0;
-}
-
-#endif
--- a/arch/s390/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/arch/score/include/asm/mutex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SCORE_MUTEX_H
-#define _ASM_SCORE_MUTEX_H
-
-#include <asm-generic/mutex-dec.h>
-
-#endif /* _ASM_SCORE_MUTEX_H */
--- a/arch/sh/include/asm/mutex-llsc.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * arch/sh/include/asm/mutex-llsc.h
- *
- * SH-4A optimized mutex locking primitives
- *
- * Please look into asm-generic/mutex-xchg.h for a formal definition.
- */
-#ifndef __ASM_SH_MUTEX_LLSC_H
-#define __ASM_SH_MUTEX_LLSC_H
-
-/*
- * Attempting to lock a mutex on SH4A is done like in ARMv6+ architecure.
- * with a bastardized atomic decrement (it is not a reliable atomic decrement
- * but it satisfies the defined semantics for our purpose, while being
- * smaller and faster than a real atomic decrement or atomic swap.
- * The idea is to attempt  decrementing the lock value only once. If once
- * decremented it isn't zero, or if its store-back fails due to a dispute
- * on the exclusive store, we simply bail out immediately through the slow
- * path where the lock will be reattempted until it succeeds.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-       int __done, __res;
-
-       __asm__ __volatile__ (
-               "movli.l        @%2, %0 \n"
-               "add            #-1, %0 \n"
-               "movco.l        %0, @%2 \n"
-               "movt           %1      \n"
-               : "=&z" (__res), "=&r" (__done)
-               : "r" (&(count)->counter)
-               : "t");
-
-       if (unlikely(!__done || __res != 0))
-               fail_fn(count);
-}
-
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
-       int __done, __res;
-
-       __asm__ __volatile__ (
-               "movli.l        @%2, %0 \n"
-               "add            #-1, %0 \n"
-               "movco.l        %0, @%2 \n"
-               "movt           %1      \n"
-               : "=&z" (__res), "=&r" (__done)
-               : "r" (&(count)->counter)
-               : "t");
-
-       if (unlikely(!__done || __res != 0))
-               __res = -1;
-
-       return __res;
-}
-
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-       int __done, __res;
-
-       __asm__ __volatile__ (
-               "movli.l        @%2, %0 \n\t"
-               "add            #1, %0  \n\t"
-               "movco.l        %0, @%2 \n\t"
-               "movt           %1      \n\t"
-               : "=&z" (__res), "=&r" (__done)
-               : "r" (&(count)->counter)
-               : "t");
-
-       if (unlikely(!__done || __res <= 0))
-               fail_fn(count);
-}
-
-/*
- * If the unlock was done on a contended lock, or if the unlock simply fails
- * then the mutex remains locked.
- */
-#define __mutex_slowpath_needs_to_unlock()     1
-
-/*
- * For __mutex_fastpath_trylock we do an atomic decrement and check the
- * result and put it in the __res variable.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-       int __res, __orig;
-
-       __asm__ __volatile__ (
-               "1: movli.l     @%2, %0         \n\t"
-               "dt             %0              \n\t"
-               "movco.l        %0,@%2          \n\t"
-               "bf             1b              \n\t"
-               "cmp/eq         #0,%0           \n\t"
-               "bt             2f              \n\t"
-               "mov            #0, %1          \n\t"
-               "bf             3f              \n\t"
-               "2: mov         #1, %1          \n\t"
-               "3:                             "
-               : "=&z" (__orig), "=&r" (__res)
-               : "r" (&count->counter)
-               : "t");
-
-       return __res;
-}
-#endif /* __ASM_SH_MUTEX_LLSC_H */
--- a/arch/sh/include/asm/mutex.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-#if defined(CONFIG_CPU_SH4A)
-#include <asm/mutex-llsc.h>
-#else
-#include <asm-generic/mutex-dec.h>
-#endif
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += module.h
-generic-y += mutex.h
 generic-y += preempt.h
 generic-y += rwsem.h
 generic-y += serial.h
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -21,7 +21,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
-generic-y += mutex.h
 generic-y += param.h
 generic-y += parport.h
 generic-y += poll.h
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mutex.h
 generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
--- a/arch/unicore32/include/asm/mutex.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * linux/arch/unicore32/include/asm/mutex.h
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * UniCore optimized mutex locking primitives
- *
- * Please look into asm-generic/mutex-xchg.h for a formal definition.
- */
-#ifndef __UNICORE_MUTEX_H__
-#define __UNICORE_MUTEX_H__
-
-# include <asm-generic/mutex-xchg.h>
-#endif
--- a/arch/x86/include/asm/mutex.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef CONFIG_X86_32
-# include <asm/mutex_32.h>
-#else
-# include <asm/mutex_64.h>
-#endif
--- a/arch/x86/include/asm/mutex_32.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Assembly implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- *
- * started by Ingo Molnar:
- *
- *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar 
<mi...@redhat.com>
- */
-#ifndef _ASM_X86_MUTEX_32_H
-#define _ASM_X86_MUTEX_32_H
-
-#include <asm/alternative.h>
-
-/**
- *  __mutex_fastpath_lock - try to take the lock by moving the count
- *                          from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *  @fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fn> if it
- * wasn't 1 originally. This function MUST leave the value lower than 1
- * even when the "1" assertion wasn't true.
- */
-#define __mutex_fastpath_lock(count, fail_fn)                  \
-do {                                                           \
-       unsigned int dummy;                                     \
-                                                               \
-       typecheck(atomic_t *, count);                           \
-       typecheck_fn(void (*)(atomic_t *), fail_fn);            \
-                                                               \
-       asm volatile(LOCK_PREFIX "   decl (%%eax)\n"            \
-                    "   jns 1f \n"                             \
-                    "   call " #fail_fn "\n"                   \
-                    "1:\n"                                     \
-                    : "=a" (dummy)                             \
-                    : "a" (count)                              \
-                    : "memory", "ecx", "edx");                 \
-} while (0)
-
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count)
-{
-       if (unlikely(atomic_dec_return(count) < 0))
-               return -1;
-       else
-               return 0;
-}
-
-/**
- *  __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 0
- *
- * try to promote the mutex from 0 to 1. if it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value
- * to 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-#define __mutex_fastpath_unlock(count, fail_fn)                        \
-do {                                                           \
-       unsigned int dummy;                                     \
-                                                               \
-       typecheck(atomic_t *, count);                           \
-       typecheck_fn(void (*)(atomic_t *), fail_fn);            \
-                                                               \
-       asm volatile(LOCK_PREFIX "   incl (%%eax)\n"            \
-                    "   jg     1f\n"                           \
-                    "   call " #fail_fn "\n"                   \
-                    "1:\n"                                     \
-                    : "=a" (dummy)                             \
-                    : "a" (count)                              \
-                    : "memory", "ecx", "edx");                 \
-} while (0)
-
-#define __mutex_slowpath_needs_to_unlock()     1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- */
-static inline int __mutex_fastpath_trylock(atomic_t *count,
-                                          int (*fail_fn)(atomic_t *))
-{
-       /* cmpxchg because it never induces a false contention state. */
-       if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
-               return 1;
-
-       return 0;
-}
-
-#endif /* _ASM_X86_MUTEX_32_H */
--- a/arch/x86/include/asm/mutex_64.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Assembly implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- *
- * started by Ingo Molnar:
- *
- *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar 
<mi...@redhat.com>
- */
-#ifndef _ASM_X86_MUTEX_64_H
-#define _ASM_X86_MUTEX_64_H
-
-/**
- * __mutex_fastpath_lock - decrement and call function if negative
- * @v: pointer of type atomic_t
- * @fail_fn: function to call if the result is negative
- *
- * Atomically decrements @v and calls <fail_fn> if the result is negative.
- */
-#ifdef CC_HAVE_ASM_GOTO
-static inline void __mutex_fastpath_lock(atomic_t *v,
-                                        void (*fail_fn)(atomic_t *))
-{
-       asm_volatile_goto(LOCK_PREFIX "   decl %0\n"
-                         "   jns %l[exit]\n"
-                         : : "m" (v->counter)
-                         : "memory", "cc"
-                         : exit);
-       fail_fn(v);
-exit:
-       return;
-}
-#else
-#define __mutex_fastpath_lock(v, fail_fn)                      \
-do {                                                           \
-       unsigned long dummy;                                    \
-                                                               \
-       typecheck(atomic_t *, v);                               \
-       typecheck_fn(void (*)(atomic_t *), fail_fn);            \
-                                                               \
-       asm volatile(LOCK_PREFIX "   decl (%%rdi)\n"            \
-                    "   jns 1f         \n"                     \
-                    "   call " #fail_fn "\n"                   \
-                    "1:"                                       \
-                    : "=D" (dummy)                             \
-                    : "D" (v)                                  \
-                    : "rax", "rsi", "rdx", "rcx",              \
-                      "r8", "r9", "r10", "r11", "memory");     \
-} while (0)
-#endif
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count)
-{
-       if (unlikely(atomic_dec_return(count) < 0))
-               return -1;
-       else
-               return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - increment and call function if nonpositive
- * @v: pointer of type atomic_t
- * @fail_fn: function to call if the result is nonpositive
- *
- * Atomically increments @v and calls <fail_fn> if the result is nonpositive.
- */
-#ifdef CC_HAVE_ASM_GOTO
-static inline void __mutex_fastpath_unlock(atomic_t *v,
-                                          void (*fail_fn)(atomic_t *))
-{
-       asm_volatile_goto(LOCK_PREFIX "   incl %0\n"
-                         "   jg %l[exit]\n"
-                         : : "m" (v->counter)
-                         : "memory", "cc"
-                         : exit);
-       fail_fn(v);
-exit:
-       return;
-}
-#else
-#define __mutex_fastpath_unlock(v, fail_fn)                    \
-do {                                                           \
-       unsigned long dummy;                                    \
-                                                               \
-       typecheck(atomic_t *, v);                               \
-       typecheck_fn(void (*)(atomic_t *), fail_fn);            \
-                                                               \
-       asm volatile(LOCK_PREFIX "   incl (%%rdi)\n"            \
-                    "   jg 1f\n"                               \
-                    "   call " #fail_fn "\n"                   \
-                    "1:"                                       \
-                    : "=D" (dummy)                             \
-                    : "D" (v)                                  \
-                    : "rax", "rsi", "rdx", "rcx",              \
-                      "r8", "r9", "r10", "r11", "memory");     \
-} while (0)
-#endif
-
-#define __mutex_slowpath_needs_to_unlock()     1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: fallback function
- *
- * Change the count from 1 to 0 and return 1 (success), or return 0 (failure)
- * if it wasn't 1 originally. [the fallback function is never used on
- * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.]
- */
-static inline int __mutex_fastpath_trylock(atomic_t *count,
-                                          int (*fail_fn)(atomic_t *))
-{
-       if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
-               return 1;
-
-       return 0;
-}
-
-#endif /* _ASM_X86_MUTEX_64_H */
--- a/arch/xtensa/include/asm/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
--- a/include/asm-generic/mutex-dec.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * include/asm-generic/mutex-dec.h
- *
- * Generic implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- */
-#ifndef _ASM_GENERIC_MUTEX_DEC_H
-#define _ASM_GENERIC_MUTEX_DEC_H
-
-/**
- *  __mutex_fastpath_lock - try to take the lock by moving the count
- *                          from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-       if (unlikely(atomic_dec_return_acquire(count) < 0))
-               fail_fn(count);
-}
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
-       if (unlikely(atomic_dec_return_acquire(count) < 0))
-               return -1;
-       return 0;
-}
-
-/**
- *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, then the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-       if (unlikely(atomic_inc_return_release(count) <= 0))
-               fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock()             1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- *
- * If the architecture has no effective trylock variant, it should call the
- * <fail_fn> spinlock-based trylock variant unconditionally.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-       if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 
0) == 1))
-               return 1;
-       return 0;
-}
-
-#endif
--- a/include/asm-generic/mutex-null.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * include/asm-generic/mutex-null.h
- *
- * Generic implementation of the mutex fastpath, based on NOP :-)
- *
- * This is used by the mutex-debugging infrastructure, but it can also
- * be used by architectures that (for whatever reason) want to use the
- * spinlock based slowpath.
- */
-#ifndef _ASM_GENERIC_MUTEX_NULL_H
-#define _ASM_GENERIC_MUTEX_NULL_H
-
-#define __mutex_fastpath_lock(count, fail_fn)          fail_fn(count)
-#define __mutex_fastpath_lock_retval(count)            (-1)
-#define __mutex_fastpath_unlock(count, fail_fn)                fail_fn(count)
-#define __mutex_fastpath_trylock(count, fail_fn)       fail_fn(count)
-#define __mutex_slowpath_needs_to_unlock()             1
-
-#endif
--- a/include/asm-generic/mutex-xchg.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * include/asm-generic/mutex-xchg.h
- *
- * Generic implementation of the mutex fastpath, based on xchg().
- *
- * NOTE: An xchg based implementation might be less optimal than an atomic
- *       decrement/increment based implementation. If your architecture
- *       has a reasonable atomic dec/inc then you should probably use
- *      asm-generic/mutex-dec.h instead, or you could open-code an
- *      optimized version in asm/mutex.h.
- */
-#ifndef _ASM_GENERIC_MUTEX_XCHG_H
-#define _ASM_GENERIC_MUTEX_XCHG_H
-
-/**
- *  __mutex_fastpath_lock - try to take the lock by moving the count
- *                          from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
- * wasn't 1 originally. This function MUST leave the value lower than 1
- * even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-       if (unlikely(atomic_xchg(count, 0) != 1))
-               /*
-                * We failed to acquire the lock, so mark it contended
-                * to ensure that any waiting tasks are woken up by the
-                * unlock slow path.
-                */
-               if (likely(atomic_xchg_acquire(count, -1) != 1))
-                       fail_fn(count);
-}
-
-/**
- *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
- *                                 from 1 to a 0 value
- *  @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
-       if (unlikely(atomic_xchg_acquire(count, 0) != 1))
-               if (likely(atomic_xchg(count, -1) != 1))
-                       return -1;
-       return 0;
-}
-
-/**
- *  __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
- *  @count: pointer of type atomic_t
- *  @fail_fn: function to call if the original value was not 0
- *
- * try to promote the mutex from 0 to 1. if it wasn't 0, call <function>
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than one.
- * If the implementation sets it to a value of lower than one, the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-       if (unlikely(atomic_xchg_release(count, 1) != 0))
-               fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock()             0
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- *  @count: pointer of type atomic_t
- *  @fail_fn: spinlock based trylock implementation
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- *
- * If the architecture has no effective trylock variant, it should call the
- * <fail_fn> spinlock-based trylock variant unconditionally.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-       int prev;
-
-       if (atomic_read(count) != 1)
-               return 0;
-
-       prev = atomic_xchg_acquire(count, 0);
-       if (unlikely(prev < 0)) {
-               /*
-                * The lock was marked contended so we must restore that
-                * state. If while doing so we get back a prev value of 1
-                * then we just own it.
-                *
-                * [ In the rare case of the mutex going to 1, to 0, to -1
-                *   and then back to 0 in this few-instructions window,
-                *   this has the potential to trigger the slowpath for the
-                *   owner's unlock path needlessly, but that's not a problem
-                *   in practice. ]
-                */
-               prev = atomic_xchg_acquire(count, prev);
-               if (prev < 0)
-                       prev = 0;
-       }
-
-       return prev;
-}
-
-#endif
--- a/include/asm-generic/mutex.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_GENERIC_MUTEX_H
-#define __ASM_GENERIC_MUTEX_H
-/*
- * Pull in the generic implementation for the mutex fastpath,
- * which is a reasonable default on many architectures.
- */
-
-#include <asm-generic/mutex-dec.h>
-#endif /* __ASM_GENERIC_MUTEX_H */
--- a/include/linux/mutex-debug.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef __LINUX_MUTEX_DEBUG_H
-#define __LINUX_MUTEX_DEBUG_H
-
-#include <linux/linkage.h>
-#include <linux/lockdep.h>
-#include <linux/debug_locks.h>
-
-/*
- * Mutexes - debugging helpers:
- */
-
-#define __DEBUG_MUTEX_INITIALIZER(lockname)                            \
-       , .magic = &lockname
-
-#define mutex_init(mutex)                                              \
-do {                                                                   \
-       static struct lock_class_key __key;                             \
-                                                                       \
-       __mutex_init((mutex), #mutex, &__key);                          \
-} while (0)
-
-extern void mutex_destroy(struct mutex *lock);
-
-#endif
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -18,6 +18,7 @@
 #include <linux/atomic.h>
 #include <asm/processor.h>
 #include <linux/osq_lock.h>
+#include <linux/debug_locks.h>
 
 /*
  * Simple, straightforward mutexes with strict semantics:
@@ -48,16 +49,12 @@
  *   locks and tasks (and only those tasks)
  */
 struct mutex {
-       /* 1: unlocked, 0: locked, negative: locked, possible waiters */
-       atomic_t                count;
+       atomic_long_t           owner;
        spinlock_t              wait_lock;
-       struct list_head        wait_list;
-#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
-       struct task_struct      *owner;
-#endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
        struct optimistic_spin_queue osq; /* Spinner MCS lock */
 #endif
+       struct list_head        wait_list;
 #ifdef CONFIG_DEBUG_MUTEXES
        void                    *magic;
 #endif
@@ -66,6 +63,11 @@ struct mutex {
 #endif
 };
 
+static inline struct task_struct *__mutex_owner(struct mutex *lock)
+{
+       return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03);
+}
+
 /*
  * This is the control structure for tasks blocked on mutex,
  * which resides on the blocked task's kernel stack:
@@ -79,9 +81,20 @@ struct mutex_waiter {
 };
 
 #ifdef CONFIG_DEBUG_MUTEXES
-# include <linux/mutex-debug.h>
+
+#define __DEBUG_MUTEX_INITIALIZER(lockname)                            \
+       , .magic = &lockname
+
+extern void mutex_destroy(struct mutex *lock);
+
 #else
+
 # define __DEBUG_MUTEX_INITIALIZER(lockname)
+
+static inline void mutex_destroy(struct mutex *lock) {}
+
+#endif
+
 /**
  * mutex_init - initialize the mutex
  * @mutex: the mutex to be initialized
@@ -90,14 +103,12 @@ struct mutex_waiter {
  *
  * It is not allowed to initialize an already locked mutex.
  */
-# define mutex_init(mutex) \
-do {                                                   \
-       static struct lock_class_key __key;             \
-                                                       \
-       __mutex_init((mutex), #mutex, &__key);          \
+#define mutex_init(mutex)                                              \
+do {                                                                   \
+       static struct lock_class_key __key;                             \
+                                                                       \
+       __mutex_init((mutex), #mutex, &__key);                          \
 } while (0)
-static inline void mutex_destroy(struct mutex *lock) {}
-#endif
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
@@ -107,7 +118,7 @@ static inline void mutex_destroy(struct
 #endif
 
 #define __MUTEX_INITIALIZER(lockname) \
-               { .count = ATOMIC_INIT(1) \
+               { .owner = ATOMIC_LONG_INIT(0) \
                , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
                , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
                __DEBUG_MUTEX_INITIALIZER(lockname) \
@@ -127,7 +138,10 @@ extern void __mutex_init(struct mutex *l
  */
 static inline int mutex_is_locked(struct mutex *lock)
 {
-       return atomic_read(&lock->count) != 1;
+       /*
+        * XXX think about spin_is_locked
+        */
+       return __mutex_owner(lock) != NULL;
 }
 
 /*
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -73,21 +73,8 @@ void debug_mutex_unlock(struct mutex *lo
 {
        if (likely(debug_locks)) {
                DEBUG_LOCKS_WARN_ON(lock->magic != lock);
-
-               if (!lock->owner)
-                       DEBUG_LOCKS_WARN_ON(!lock->owner);
-               else
-                       DEBUG_LOCKS_WARN_ON(lock->owner != current);
-
                DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && 
!lock->wait_list.next);
        }
-
-       /*
-        * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug
-        * mutexes so that we can do it here after we've verified state.
-        */
-       mutex_clear_owner(lock);
-       atomic_set(&lock->count, 1);
 }
 
 void debug_mutex_init(struct mutex *lock, const char *name,
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -27,16 +27,6 @@ extern void debug_mutex_unlock(struct mu
 extern void debug_mutex_init(struct mutex *lock, const char *name,
                             struct lock_class_key *key);
 
-static inline void mutex_set_owner(struct mutex *lock)
-{
-       WRITE_ONCE(lock->owner, current);
-}
-
-static inline void mutex_clear_owner(struct mutex *lock)
-{
-       WRITE_ONCE(lock->owner, NULL);
-}
-
 #define spin_lock_mutex(lock, flags)                   \
        do {                                            \
                struct mutex *l = container_of(lock, struct mutex, wait_lock); \
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -33,35 +33,90 @@
  */
 #ifdef CONFIG_DEBUG_MUTEXES
 # include "mutex-debug.h"
-# include <asm-generic/mutex-null.h>
-/*
- * Must be 0 for the debug case so we do not do the unlock outside of the
- * wait_lock region. debug_mutex_unlock() will do the actual unlock in this
- * case.
- */
-# undef __mutex_slowpath_needs_to_unlock
-# define  __mutex_slowpath_needs_to_unlock()   0
 #else
 # include "mutex.h"
-# include <asm/mutex.h>
 #endif
 
 void
 __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 {
-       atomic_set(&lock->count, 1);
+       atomic_long_set(&lock->owner, 0);
        spin_lock_init(&lock->wait_lock);
        INIT_LIST_HEAD(&lock->wait_list);
-       mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
        osq_lock_init(&lock->osq);
 #endif
 
        debug_mutex_init(lock, name, key);
 }
-
 EXPORT_SYMBOL(__mutex_init);
 
+#define MUTEX_FLAG_WAITERS     0x01
+
+#define MUTEX_FLAGS            0x03
+
+static inline struct task_struct *__owner_task(unsigned long owner)
+{
+       return (struct task_struct *)(owner & ~MUTEX_FLAGS);
+}
+
+static inline unsigned long __owner_flags(unsigned long owner)
+{
+       return owner & MUTEX_FLAGS;
+}
+
+/*
+ * Actual trylock that will work on any unlocked state.
+ */
+static inline bool __mutex_trylock(struct mutex *lock)
+{
+       unsigned long owner, curr = (unsigned long)current;
+
+       owner = atomic_long_read(&lock->owner);
+       for (;;) { /* must loop, can race against a flag */
+               unsigned long old;
+
+               if (__owner_task(owner)) {
+                       if ((unsigned long)__owner_task(owner) == curr)
+                               return true;
+
+                       return false;
+               }
+
+               curr |= __owner_flags(owner);
+               old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr);
+               if (old == owner)
+                       return true;
+
+               owner = old;
+       }
+}
+
+/*
+ * Optimistic trylock that only works in the uncontended case. Make sure to
+ * follow with a __mutex_trylock() before failing.
+ */
+static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
+{
+       unsigned long owner, curr = (unsigned long)current;
+
+       owner = atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr);
+       if (!owner)
+               return true;
+
+       return false;
+}
+
+static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag)
+{
+       atomic_long_or(flag, &lock->owner);
+}
+
+static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag)
+{
+       atomic_long_andnot(flag, &lock->owner);
+}
+
 #ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * We split the mutex lock/unlock logic into separate fastpath and
@@ -69,7 +124,7 @@ EXPORT_SYMBOL(__mutex_init);
  * We also put the fastpath first in the kernel image, to make sure the
  * branch is predicted by the CPU as default-untaken.
  */
-__visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
+static void __sched __mutex_lock_slowpath(struct mutex *lock);
 
 /**
  * mutex_lock - acquire the mutex
@@ -95,14 +150,10 @@ __visible void __sched __mutex_lock_slow
 void __sched mutex_lock(struct mutex *lock)
 {
        might_sleep();
-       /*
-        * The locking fastpath is the 1->0 transition from
-        * 'unlocked' into 'locked' state.
-        */
-       __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
-       mutex_set_owner(lock);
-}
 
+       if (!__mutex_trylock_fast(lock))
+               __mutex_lock_slowpath(lock);
+}
 EXPORT_SYMBOL(mutex_lock);
 #endif
 
@@ -176,7 +227,7 @@ ww_mutex_set_context_fastpath(struct ww_
        /*
         * Check if lock is contended, if not there is nobody to wake up
         */
-       if (likely(atomic_read(&lock->base.count) == 0))
+       if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))
                return;
 
        /*
@@ -227,7 +278,7 @@ bool mutex_spin_on_owner(struct mutex *l
        bool ret = true;
 
        rcu_read_lock();
-       while (lock->owner == owner) {
+       while (__mutex_owner(lock) == owner) {
                /*
                 * Ensure we emit the owner->on_cpu, dereference _after_
                 * checking lock->owner still matches owner. If that fails,
@@ -260,7 +311,7 @@ static inline int mutex_can_spin_on_owne
                return 0;
 
        rcu_read_lock();
-       owner = READ_ONCE(lock->owner);
+       owner = __mutex_owner(lock);
        if (owner)
                retval = owner->on_cpu;
        rcu_read_unlock();
@@ -272,15 +323,6 @@ static inline int mutex_can_spin_on_owne
 }
 
 /*
- * Atomically try to take the lock when it is available
- */
-static inline bool mutex_try_to_acquire(struct mutex *lock)
-{
-       return !mutex_is_locked(lock) &&
-               (atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1);
-}
-
-/*
  * Optimistic spinning.
  *
  * We try to spin for acquisition when we find that the lock owner
@@ -342,12 +384,12 @@ static bool mutex_optimistic_spin(struct
                 * If there's an owner, wait for it to either
                 * release the lock or go to sleep.
                 */
-               owner = READ_ONCE(lock->owner);
+               owner = __mutex_owner(lock);
                if (owner && !mutex_spin_on_owner(lock, owner))
                        break;
 
                /* Try to acquire the mutex if it is unlocked. */
-               if (mutex_try_to_acquire(lock)) {
+               if (__mutex_trylock(lock)) {
                        lock_acquired(&lock->dep_map, ip);
 
                        if (use_ww_ctx) {
@@ -357,7 +399,6 @@ static bool mutex_optimistic_spin(struct
                                ww_mutex_set_context_fastpath(ww, ww_ctx);
                        }
 
-                       mutex_set_owner(lock);
                        osq_unlock(&lock->osq);
                        return true;
                }
@@ -406,8 +447,7 @@ static bool mutex_optimistic_spin(struct
 }
 #endif
 
-__visible __used noinline
-void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
+static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock);
 
 /**
  * mutex_unlock - release the mutex
@@ -422,21 +462,16 @@ void __sched __mutex_unlock_slowpath(ato
  */
 void __sched mutex_unlock(struct mutex *lock)
 {
-       /*
-        * The unlocking fastpath is the 0->1 transition from 'locked'
-        * into 'unlocked' state:
-        */
-#ifndef CONFIG_DEBUG_MUTEXES
-       /*
-        * When debugging is enabled we must not clear the owner before time,
-        * the slow path will always be taken, and that clears the owner field
-        * after verifying that it was indeed current.
-        */
-       mutex_clear_owner(lock);
+       unsigned long owner;
+
+#ifdef CONFIG_DEBUG_MUTEXES
+       DEBUG_LOCKS_WARN_ON(__mutex_owner(lock) != current);
 #endif
-       __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
-}
 
+       owner = atomic_long_fetch_and(MUTEX_FLAGS, &lock->owner);
+       if (__owner_flags(owner))
+               __mutex_unlock_slowpath(lock);
+}
 EXPORT_SYMBOL(mutex_unlock);
 
 /**
@@ -465,15 +500,7 @@ void __sched ww_mutex_unlock(struct ww_m
                lock->ctx = NULL;
        }
 
-#ifndef CONFIG_DEBUG_MUTEXES
-       /*
-        * When debugging is enabled we must not clear the owner before time,
-        * the slow path will always be taken, and that clears the owner field
-        * after verifying that it was indeed current.
-        */
-       mutex_clear_owner(&lock->base);
-#endif
-       __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath);
+       mutex_unlock(&lock->base);
 }
 EXPORT_SYMBOL(ww_mutex_unlock);
 
@@ -520,7 +547,7 @@ __mutex_lock_common(struct mutex *lock,
        preempt_disable();
        mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
-       if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) {
+       if (__mutex_trylock(lock) || mutex_optimistic_spin(lock, ww_ctx, 
use_ww_ctx)) {
                /* got the lock, yay! */
                preempt_enable();
                return 0;
@@ -529,11 +556,9 @@ __mutex_lock_common(struct mutex *lock,
        spin_lock_mutex(&lock->wait_lock, flags);
 
        /*
-        * Once more, try to acquire the lock. Only try-lock the mutex if
-        * it is unlocked to reduce unnecessary xchg() operations.
+        * Once more, try to acquire the lock.
         */
-       if (!mutex_is_locked(lock) &&
-           (atomic_xchg_acquire(&lock->count, 0) == 1))
+       if (__mutex_trylock(lock))
                goto skip_wait;
 
        debug_mutex_lock_common(lock, &waiter);
@@ -543,21 +568,20 @@ __mutex_lock_common(struct mutex *lock,
        list_add_tail(&waiter.list, &lock->wait_list);
        waiter.task = task;
 
+       if (list_first_entry(&lock->wait_list, struct mutex_waiter, list) == 
&waiter) {
+               __mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
+               /*
+                * We must be sure to set WAITERS before attempting the trylock
+                * below, such that mutex_unlock() must either see our WAITERS
+                * or we see its unlock.
+                */
+               smp_mb__after_atomic();
+       }
+
        lock_contended(&lock->dep_map, ip);
 
        for (;;) {
-               /*
-                * Lets try to take the lock again - this is needed even if
-                * we get here for the first time (shortly after failing to
-                * acquire the lock), to make sure that we get a wakeup once
-                * it's unlocked. Later on, if we sleep, this is the
-                * operation that gives us the lock. We xchg it to -1, so
-                * that when we release the lock, we properly wake up the
-                * other waiters. We only attempt the xchg if the count is
-                * non-negative in order to avoid unnecessary xchg operations:
-                */
-               if (atomic_read(&lock->count) >= 0 &&
-                   (atomic_xchg_acquire(&lock->count, -1) == 1))
+               if (__mutex_trylock(lock))
                        break;
 
                /*
@@ -587,13 +611,13 @@ __mutex_lock_common(struct mutex *lock,
        mutex_remove_waiter(lock, &waiter, task);
        /* set it to 0 if there are no waiters left: */
        if (likely(list_empty(&lock->wait_list)))
-               atomic_set(&lock->count, 0);
+               __mutex_clear_flag(lock, MUTEX_FLAG_WAITERS);
+
        debug_mutex_free_waiter(&waiter);
 
 skip_wait:
        /* got the lock - cleanup and rejoice! */
        lock_acquired(&lock->dep_map, ip);
-       mutex_set_owner(lock);
 
        if (use_ww_ctx) {
                struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
@@ -631,7 +655,6 @@ _mutex_lock_nest_lock(struct mutex *lock
        __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
                            0, nest, _RET_IP_, NULL, 0);
 }
-
 EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
 
 int __sched
@@ -650,7 +673,6 @@ mutex_lock_interruptible_nested(struct m
        return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
                                   subclass, NULL, _RET_IP_, NULL, 0);
 }
-
 EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
 
 static inline int
@@ -715,29 +737,13 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interr
 /*
  * Release the lock, slowpath:
  */
-static inline void
-__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
+static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock)
 {
        unsigned long flags;
        WAKE_Q(wake_q);
 
-       /*
-        * As a performance measurement, release the lock before doing other
-        * wakeup related duties to follow. This allows other tasks to acquire
-        * the lock sooner, while still handling cleanups in past unlock calls.
-        * This can be done as we do not enforce strict equivalence between the
-        * mutex counter and wait_list.
-        *
-        *
-        * Some architectures leave the lock unlocked in the fastpath failure
-        * case, others need to leave it locked. In the later case we have to
-        * unlock it here - as the lock counter is currently 0 or negative.
-        */
-       if (__mutex_slowpath_needs_to_unlock())
-               atomic_set(&lock->count, 1);
-
        spin_lock_mutex(&lock->wait_lock, flags);
-       mutex_release(&lock->dep_map, nested, _RET_IP_);
+       mutex_release(&lock->dep_map, 0, _RET_IP_);
        debug_mutex_unlock(lock);
 
        if (!list_empty(&lock->wait_list)) {
@@ -754,17 +760,6 @@ __mutex_unlock_common_slowpath(struct mu
        wake_up_q(&wake_q);
 }
 
-/*
- * Release the lock, slowpath:
- */
-__visible void
-__mutex_unlock_slowpath(atomic_t *lock_count)
-{
-       struct mutex *lock = container_of(lock_count, struct mutex, count);
-
-       __mutex_unlock_common_slowpath(lock, 1);
-}
-
 #ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * Here come the less common (and hence less performance-critical) APIs:
@@ -789,38 +784,29 @@ __mutex_lock_interruptible_slowpath(stru
  */
 int __sched mutex_lock_interruptible(struct mutex *lock)
 {
-       int ret;
-
        might_sleep();
-       ret =  __mutex_fastpath_lock_retval(&lock->count);
-       if (likely(!ret)) {
-               mutex_set_owner(lock);
+
+       if (__mutex_trylock_fast(lock))
                return 0;
-       } else
-               return __mutex_lock_interruptible_slowpath(lock);
+
+       return __mutex_lock_interruptible_slowpath(lock);
 }
 
 EXPORT_SYMBOL(mutex_lock_interruptible);
 
 int __sched mutex_lock_killable(struct mutex *lock)
 {
-       int ret;
-
        might_sleep();
-       ret = __mutex_fastpath_lock_retval(&lock->count);
-       if (likely(!ret)) {
-               mutex_set_owner(lock);
+
+       if (__mutex_trylock_fast(lock))
                return 0;
-       } else
-               return __mutex_lock_killable_slowpath(lock);
+
+       return __mutex_lock_killable_slowpath(lock);
 }
 EXPORT_SYMBOL(mutex_lock_killable);
 
-__visible void __sched
-__mutex_lock_slowpath(atomic_t *lock_count)
+static void __sched __mutex_lock_slowpath(struct mutex *lock)
 {
-       struct mutex *lock = container_of(lock_count, struct mutex, count);
-
        __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0,
                            NULL, _RET_IP_, NULL, 0);
 }
@@ -856,37 +842,6 @@ __ww_mutex_lock_interruptible_slowpath(s
 
 #endif
 
-/*
- * Spinlock based trylock, we take the spinlock and check whether we
- * can get the lock:
- */
-static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
-{
-       struct mutex *lock = container_of(lock_count, struct mutex, count);
-       unsigned long flags;
-       int prev;
-
-       /* No need to trylock if the mutex is locked. */
-       if (mutex_is_locked(lock))
-               return 0;
-
-       spin_lock_mutex(&lock->wait_lock, flags);
-
-       prev = atomic_xchg_acquire(&lock->count, -1);
-       if (likely(prev == 1)) {
-               mutex_set_owner(lock);
-               mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-       }
-
-       /* Set it back to 0 if there are no waiters: */
-       if (likely(list_empty(&lock->wait_list)))
-               atomic_set(&lock->count, 0);
-
-       spin_unlock_mutex(&lock->wait_lock, flags);
-
-       return prev == 1;
-}
-
 /**
  * mutex_trylock - try to acquire the mutex, without waiting
  * @lock: the mutex to be acquired
@@ -903,13 +858,7 @@ static inline int __mutex_trylock_slowpa
  */
 int __sched mutex_trylock(struct mutex *lock)
 {
-       int ret;
-
-       ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
-       if (ret)
-               mutex_set_owner(lock);
-
-       return ret;
+       return __mutex_trylock(lock);
 }
 EXPORT_SYMBOL(mutex_trylock);
 
@@ -917,36 +866,28 @@ EXPORT_SYMBOL(mutex_trylock);
 int __sched
 __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
-       int ret;
-
        might_sleep();
 
-       ret = __mutex_fastpath_lock_retval(&lock->base.count);
-
-       if (likely(!ret)) {
+       if (__mutex_trylock_fast(&lock->base)) {
                ww_mutex_set_context_fastpath(lock, ctx);
-               mutex_set_owner(&lock->base);
-       } else
-               ret = __ww_mutex_lock_slowpath(lock, ctx);
-       return ret;
+               return 0;
+       }
+
+       return __ww_mutex_lock_slowpath(lock, ctx);
 }
 EXPORT_SYMBOL(__ww_mutex_lock);
 
 int __sched
 __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx 
*ctx)
 {
-       int ret;
-
        might_sleep();
 
-       ret = __mutex_fastpath_lock_retval(&lock->base.count);
-
-       if (likely(!ret)) {
+       if (__mutex_trylock_fast(&lock->base)) {
                ww_mutex_set_context_fastpath(lock, ctx);
-               mutex_set_owner(&lock->base);
-       } else
-               ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx);
-       return ret;
+               return 0;
+       }
+
+       return __ww_mutex_lock_interruptible_slowpath(lock, ctx);
 }
 EXPORT_SYMBOL(__ww_mutex_lock_interruptible);
 
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -16,32 +16,6 @@
 #define mutex_remove_waiter(lock, waiter, task) \
                __list_del((waiter)->list.prev, (waiter)->list.next)
 
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-/*
- * The mutex owner can get read and written to locklessly.
- * We should use WRITE_ONCE when writing the owner value to
- * avoid store tearing, otherwise, a thread could potentially
- * read a partially written and incomplete owner value.
- */
-static inline void mutex_set_owner(struct mutex *lock)
-{
-       WRITE_ONCE(lock->owner, current);
-}
-
-static inline void mutex_clear_owner(struct mutex *lock)
-{
-       WRITE_ONCE(lock->owner, NULL);
-}
-#else
-static inline void mutex_set_owner(struct mutex *lock)
-{
-}
-
-static inline void mutex_clear_owner(struct mutex *lock)
-{
-}
-#endif
-
 #define debug_mutex_wake_waiter(lock, waiter)          do { } while (0)
 #define debug_mutex_free_waiter(waiter)                        do { } while (0)
 #define debug_mutex_add_waiter(lock, waiter, ti)       do { } while (0)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -75,11 +75,11 @@
 #include <linux/compiler.h>
 #include <linux/frame.h>
 #include <linux/prefetch.h>
+#include <linux/mutex.h>
 
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
-#include <asm/mutex.h>
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #endif


Reply via email to