[PATCH 4.9 015/107] proc/sysctl: prune stale dentries during unregistering

2018-08-14 Thread Greg Kroah-Hartman
4.9-stable review patch.  If anyone has any objections, please let me know.

--

From: Konstantin Khlebnikov 

commit d6cffbbe9a7e51eb705182965a189457c17ba8a3 upstream.

Currently unregistering sysctl table does not prune its dentries.
Stale dentries could slowdown sysctl operations significantly.

For example, command:

 # for i in {1..10} ; do unshare -n -- sysctl -a &> /dev/null ; done
 creates a millions of stale denties around sysctls of loopback interface:

 # sysctl fs.dentry-state
 fs.dentry-state = 25812579  2472413545  0   0   0

 All of them have matching names thus lookup have to scan though whole
 hash chain and call d_compare (proc_sys_compare) which checks them
 under system-wide spinlock (sysctl_lock).

 # time sysctl -a > /dev/null
 real1m12.806s
 user0m0.016s
 sys 1m12.400s

Currently only memory reclaimer could remove this garbage.
But without significant memory pressure this never happens.

This patch collects sysctl inodes into list on sysctl table header and
prunes all their dentries once that table unregisters.

Konstantin Khlebnikov  writes:
> On 10.02.2017 10:47, Al Viro wrote:
>> how about >> the matching stats *after* that patch?
>
> dcache size doesn't grow endlessly, so stats are fine
>
> # sysctl fs.dentry-state
> fs.dentry-state = 92712   58376   45  0   0   0
>
> # time sysctl -a &>/dev/null
>
> real  0m0.013s
> user  0m0.004s
> sys   0m0.008s

Signed-off-by: Konstantin Khlebnikov 
Suggested-by: Al Viro 
Signed-off-by: Eric W. Biederman 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/proc/inode.c|3 +-
 fs/proc/internal.h |7 -
 fs/proc/proc_sysctl.c  |   59 +++--
 include/linux/sysctl.h |1 
 4 files changed, 51 insertions(+), 19 deletions(-)

--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -43,10 +43,11 @@ static void proc_evict_inode(struct inod
de = PDE(inode);
if (de)
pde_put(de);
+
head = PROC_I(inode)->sysctl;
if (head) {
RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
-   sysctl_head_put(head);
+   proc_sys_evict_inode(inode, head);
}
 }
 
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -65,6 +65,7 @@ struct proc_inode {
struct proc_dir_entry *pde;
struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry;
+   struct list_head sysctl_inodes;
const struct proc_ns_operations *ns_ops;
struct inode vfs_inode;
 };
@@ -249,10 +250,12 @@ extern void proc_thread_self_init(void);
  */
 #ifdef CONFIG_PROC_SYSCTL
 extern int proc_sys_init(void);
-extern void sysctl_head_put(struct ctl_table_header *);
+extern void proc_sys_evict_inode(struct inode *inode,
+struct ctl_table_header *head);
 #else
 static inline void proc_sys_init(void) { }
-static inline void sysctl_head_put(struct ctl_table_header *head) { }
+static inline void proc_sys_evict_inode(struct  inode *inode,
+   struct ctl_table_header *head) { }
 #endif
 
 /*
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -190,6 +190,7 @@ static void init_header(struct ctl_table
head->set = set;
head->parent = NULL;
head->node = node;
+   INIT_LIST_HEAD(>inodes);
if (node) {
struct ctl_table *entry;
for (entry = table; entry->procname; entry++, node++)
@@ -259,6 +260,29 @@ static void unuse_table(struct ctl_table
complete(p->unregistering);
 }
 
+/* called under sysctl_lock */
+static void proc_sys_prune_dcache(struct ctl_table_header *head)
+{
+   struct inode *inode, *prev = NULL;
+   struct proc_inode *ei;
+
+   list_for_each_entry(ei, >inodes, sysctl_inodes) {
+   inode = igrab(>vfs_inode);
+   if (inode) {
+   spin_unlock(_lock);
+   iput(prev);
+   prev = inode;
+   d_prune_aliases(inode);
+   spin_lock(_lock);
+   }
+   }
+   if (prev) {
+   spin_unlock(_lock);
+   iput(prev);
+   spin_lock(_lock);
+   }
+}
+
 /* called under sysctl_lock, will reacquire if has to wait */
 static void start_unregistering(struct ctl_table_header *p)
 {
@@ -278,27 +302,17 @@ static void start_unregistering(struct c
p->unregistering = ERR_PTR(-EINVAL);
}
/*
+* Prune dentries for unregistered sysctls: namespaced sysctls
+* can have duplicate names and contaminate dcache very badly.
+*/
+   proc_sys_prune_dcache(p);
+   /*
 * do not remove from the list until nobody holds it; walking the
 * list in do_sysctl() relies on that.
 */
erase_header(p);
 }
 
-static void sysctl_head_get(struct ctl_table_header *head)
-{
- 

[PATCH 4.9 015/107] proc/sysctl: prune stale dentries during unregistering

2018-08-14 Thread Greg Kroah-Hartman
4.9-stable review patch.  If anyone has any objections, please let me know.

--

From: Konstantin Khlebnikov 

commit d6cffbbe9a7e51eb705182965a189457c17ba8a3 upstream.

Currently unregistering sysctl table does not prune its dentries.
Stale dentries could slowdown sysctl operations significantly.

For example, command:

 # for i in {1..10} ; do unshare -n -- sysctl -a &> /dev/null ; done
 creates a millions of stale denties around sysctls of loopback interface:

 # sysctl fs.dentry-state
 fs.dentry-state = 25812579  2472413545  0   0   0

 All of them have matching names thus lookup have to scan though whole
 hash chain and call d_compare (proc_sys_compare) which checks them
 under system-wide spinlock (sysctl_lock).

 # time sysctl -a > /dev/null
 real1m12.806s
 user0m0.016s
 sys 1m12.400s

Currently only memory reclaimer could remove this garbage.
But without significant memory pressure this never happens.

This patch collects sysctl inodes into list on sysctl table header and
prunes all their dentries once that table unregisters.

Konstantin Khlebnikov  writes:
> On 10.02.2017 10:47, Al Viro wrote:
>> how about >> the matching stats *after* that patch?
>
> dcache size doesn't grow endlessly, so stats are fine
>
> # sysctl fs.dentry-state
> fs.dentry-state = 92712   58376   45  0   0   0
>
> # time sysctl -a &>/dev/null
>
> real  0m0.013s
> user  0m0.004s
> sys   0m0.008s

Signed-off-by: Konstantin Khlebnikov 
Suggested-by: Al Viro 
Signed-off-by: Eric W. Biederman 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/proc/inode.c|3 +-
 fs/proc/internal.h |7 -
 fs/proc/proc_sysctl.c  |   59 +++--
 include/linux/sysctl.h |1 
 4 files changed, 51 insertions(+), 19 deletions(-)

--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -43,10 +43,11 @@ static void proc_evict_inode(struct inod
de = PDE(inode);
if (de)
pde_put(de);
+
head = PROC_I(inode)->sysctl;
if (head) {
RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
-   sysctl_head_put(head);
+   proc_sys_evict_inode(inode, head);
}
 }
 
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -65,6 +65,7 @@ struct proc_inode {
struct proc_dir_entry *pde;
struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry;
+   struct list_head sysctl_inodes;
const struct proc_ns_operations *ns_ops;
struct inode vfs_inode;
 };
@@ -249,10 +250,12 @@ extern void proc_thread_self_init(void);
  */
 #ifdef CONFIG_PROC_SYSCTL
 extern int proc_sys_init(void);
-extern void sysctl_head_put(struct ctl_table_header *);
+extern void proc_sys_evict_inode(struct inode *inode,
+struct ctl_table_header *head);
 #else
 static inline void proc_sys_init(void) { }
-static inline void sysctl_head_put(struct ctl_table_header *head) { }
+static inline void proc_sys_evict_inode(struct  inode *inode,
+   struct ctl_table_header *head) { }
 #endif
 
 /*
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -190,6 +190,7 @@ static void init_header(struct ctl_table
head->set = set;
head->parent = NULL;
head->node = node;
+   INIT_LIST_HEAD(>inodes);
if (node) {
struct ctl_table *entry;
for (entry = table; entry->procname; entry++, node++)
@@ -259,6 +260,29 @@ static void unuse_table(struct ctl_table
complete(p->unregistering);
 }
 
+/* called under sysctl_lock */
+static void proc_sys_prune_dcache(struct ctl_table_header *head)
+{
+   struct inode *inode, *prev = NULL;
+   struct proc_inode *ei;
+
+   list_for_each_entry(ei, >inodes, sysctl_inodes) {
+   inode = igrab(>vfs_inode);
+   if (inode) {
+   spin_unlock(_lock);
+   iput(prev);
+   prev = inode;
+   d_prune_aliases(inode);
+   spin_lock(_lock);
+   }
+   }
+   if (prev) {
+   spin_unlock(_lock);
+   iput(prev);
+   spin_lock(_lock);
+   }
+}
+
 /* called under sysctl_lock, will reacquire if has to wait */
 static void start_unregistering(struct ctl_table_header *p)
 {
@@ -278,27 +302,17 @@ static void start_unregistering(struct c
p->unregistering = ERR_PTR(-EINVAL);
}
/*
+* Prune dentries for unregistered sysctls: namespaced sysctls
+* can have duplicate names and contaminate dcache very badly.
+*/
+   proc_sys_prune_dcache(p);
+   /*
 * do not remove from the list until nobody holds it; walking the
 * list in do_sysctl() relies on that.
 */
erase_header(p);
 }
 
-static void sysctl_head_get(struct ctl_table_header *head)
-{
-