 arch/x86/Kconfig                |   1 +
 arch/x86/include/asm/spinlock.h |   5 ++
 fs/dcache.c                     |  15 +++++
 fs/namei.c                      |  18 ++++--
 include/linux/lockref.h         |  59 ++++---------------
 lib/Kconfig                     |  10 ++++
 lib/Makefile                    |   1 +
 lib/lockref.c                   | 127 ++++++++++++++++++++++++++++++++++++++++
 8 files changed, 183 insertions(+), 53 deletions(-)
 create mode 100644 lib/lockref.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf92b0ce..67e00740531c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -16,6 +16,7 @@ config X86_64
 	def_bool y
 	depends on 64BIT
 	select X86_DEV_DMA_OPS
+	select ARCH_USE_CMPXCHG_LOCKREF
 
 ### Arch settings
 config X86
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e3ddd7db723f..e0e668422c75 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -34,6 +34,11 @@
 # define UNLOCK_LOCK_PREFIX
 #endif
 
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.tickets.head == lock.tickets.tail;
+}
+
 /*
  * Ticket locks are conceptually two parts, one indicating the current head of
  * the queue, and the other indicating the current tail. The lock is acquired
diff --git a/fs/dcache.c b/fs/dcache.c
index b949af850cd6..2d244227999d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -611,8 +611,23 @@ static inline void __dget(struct dentry *dentry)
 
 struct dentry *dget_parent(struct dentry *dentry)
 {
+	int gotref;
 	struct dentry *ret;
 
+	/*
+	 * Do optimistic parent lookup without any
+	 * locking.
+	 */
+	rcu_read_lock();
+	ret = ACCESS_ONCE(dentry->d_parent);
+	gotref = lockref_get_not_zero(&ret->d_lockref);
+	rcu_read_unlock();
+	if (likely(gotref)) {
+		if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
+			return ret;
+		dput(ret);
+	}
+
 repeat:
 	/*
 	 * Don't need rcu_dereference because we re-check it was correct under
diff --git a/fs/namei.c b/fs/namei.c
index 7720fbd5277b..2dfa2e3136c0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -582,17 +582,25 @@ static int complete_walk(struct nameidata *nd)
 	int status;
 
 	if (nd->flags & LOOKUP_RCU) {
+		int gotref;
+
 		nd->flags &= ~LOOKUP_RCU;
 		if (!(nd->flags & LOOKUP_ROOT))
 			nd->root.mnt = NULL;
-		spin_lock(&dentry->d_lock);
-		if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
-			spin_unlock(&dentry->d_lock);
+
+		gotref = lockref_get_or_lock(&dentry->d_lockref);
+		if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
+			if (gotref)
+				dput(dentry);
+			else
+				spin_unlock(&dentry->d_lock);
 			unlock_rcu_walk();
 			return -ECHILD;
 		}
-		BUG_ON(nd->inode != dentry->d_inode);
-		spin_unlock(&dentry->d_lock);
+		if (!gotref) {
+			dentry->d_lockref.count++;
+			spin_unlock(&dentry->d_lock);
+		}
 		mntget(nd->path.mnt);
 		unlock_rcu_walk();
 	}
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index 01233e01627a..5bb6c6700dd5 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -17,55 +17,18 @@
 #include <linux/spinlock.h>
 
 struct lockref {
-	spinlock_t lock;
-	unsigned int count;
+	union {
+		aligned_u64 lock_count;
+		struct {
+			spinlock_t lock;
+			unsigned int count;
+		};
+	};
 };
 
-/**
- * lockref_get - Increments reference count unconditionally
- * @lockcnt: pointer to lockref structure
- *
- * This operation is only valid if you already hold a reference
- * to the object, so you know the count cannot be zero.
- */
-static inline void lockref_get(struct lockref *lockref)
-{
-	spin_lock(&lockref->lock);
-	lockref->count++;
-	spin_unlock(&lockref->lock);
-}
-
-/**
- * lockref_get_not_zero - Increments count unless the count is 0
- * @lockcnt: pointer to lockref structure
- * Return: 1 if count updated successfully or 0 if count is 0
- */
-static inline int lockref_get_not_zero(struct lockref *lockref)
-{
-	int retval = 0;
-
-	spin_lock(&lockref->lock);
-	if (lockref->count) {
-		lockref->count++;
-		retval = 1;
-	}
-	spin_unlock(&lockref->lock);
-	return retval;
-}
-
-/**
- * lockref_put_or_lock - decrements count unless count <= 1 before decrement
- * @lockcnt: pointer to lockref structure
- * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
- */
-static inline int lockref_put_or_lock(struct lockref *lockref)
-{
-	spin_lock(&lockref->lock);
-	if (lockref->count <= 1)
-		return 0;
-	lockref->count--;
-	spin_unlock(&lockref->lock);
-	return 1;
-}
+extern void lockref_get(struct lockref *);
+extern int lockref_get_not_zero(struct lockref *);
+extern int lockref_get_or_lock(struct lockref *);
+extern int lockref_put_or_lock(struct lockref *);
 
 #endif /* __LINUX_LOCKREF_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 71d9f81f6eed..65561716c16c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -48,6 +48,16 @@ config STMP_DEVICE
 config PERCPU_RWSEM
 	boolean
 
+config ARCH_USE_CMPXCHG_LOCKREF
+	bool
+
+config CMPXCHG_LOCKREF
+	def_bool y if ARCH_USE_CMPXCHG_LOCKREF
+	depends on SMP
+	depends on !GENERIC_LOCKBREAK
+	depends on !DEBUG_SPINLOCK
+	depends on !DEBUG_LOCK_ALLOC
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 7baccfd8a4e9..f2cb3082697c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -20,6 +20,7 @@ lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o klist.o
+obj-y	+= lockref.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
diff --git a/lib/lockref.c b/lib/lockref.c
new file mode 100644
index 000000000000..7819c2d1d315
--- /dev/null
+++ b/lib/lockref.c
@@ -0,0 +1,127 @@
+#include <linux/export.h>
+#include <linux/lockref.h>
+
+#ifdef CONFIG_CMPXCHG_LOCKREF
+
+/*
+ * Note that the "cmpxchg()" reloads the "old" value for the
+ * failure case.
+ */
+#define CMPXCHG_LOOP(CODE, SUCCESS) do {					\
+	struct lockref old;							\
+	BUILD_BUG_ON(sizeof(old) != 8);						\
+	old.lock_count = ACCESS_ONCE(lockref->lock_count);			\
+	while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {  	\
+		struct lockref new = old, prev = old;				\
+		CODE								\
+		old.lock_count = cmpxchg(&lockref->lock_count,			\
+					 old.lock_count, new.lock_count);	\
+		if (likely(old.lock_count == prev.lock_count)) {		\
+			SUCCESS;						\
+		}								\
+	}									\
+} while (0)
+
+#else
+
+#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0)
+
+#endif
+
+/**
+ * lockref_get - Increments reference count unconditionally
+ * @lockcnt: pointer to lockref structure
+ *
+ * This operation is only valid if you already hold a reference
+ * to the object, so you know the count cannot be zero.
+ */
+void lockref_get(struct lockref *lockref)
+{
+	CMPXCHG_LOOP(
+		new.count++;
+	,
+		return;
+	);
+
+	spin_lock(&lockref->lock);
+	lockref->count++;
+	spin_unlock(&lockref->lock);
+}
+EXPORT_SYMBOL(lockref_get);
+
+/**
+ * lockref_get_not_zero - Increments count unless the count is 0
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count was zero
+ */
+int lockref_get_not_zero(struct lockref *lockref)
+{
+	int retval;
+
+	CMPXCHG_LOOP(
+		new.count++;
+		if (!old.count)
+			return 0;
+	,
+		return 1;
+	);
+
+	spin_lock(&lockref->lock);
+	retval = 0;
+	if (lockref->count) {
+		lockref->count++;
+		retval = 1;
+	}
+	spin_unlock(&lockref->lock);
+	return retval;
+}
+EXPORT_SYMBOL(lockref_get_not_zero);
+
+/**
+ * lockref_get_or_lock - Increments count unless the count is 0
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count was zero
+ * and we got the lock instead.
+ */
+int lockref_get_or_lock(struct lockref *lockref)
+{
+	CMPXCHG_LOOP(
+		new.count++;
+		if (!old.count)
+			break;
+	,
+		return 1;
+	);
+
+	spin_lock(&lockref->lock);
+	if (!lockref->count)
+		return 0;
+	lockref->count++;
+	spin_unlock(&lockref->lock);
+	return 1;
+}
+EXPORT_SYMBOL(lockref_get_or_lock);
+
+/**
+ * lockref_put_or_lock - decrements count unless count <= 1 before decrement
+ * @lockcnt: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken
+ */
+int lockref_put_or_lock(struct lockref *lockref)
+{
+	CMPXCHG_LOOP(
+		new.count--;
+		if (old.count <= 1)
+			break;
+	,
+		return 1;
+	);
+
+	spin_lock(&lockref->lock);
+	if (lockref->count <= 1)
+		return 0;
+	lockref->count--;
+	spin_unlock(&lockref->lock);
+	return 1;
+}
+EXPORT_SYMBOL(lockref_put_or_lock);
