[Devel] [PATCH vz8] fs/ovelayfs: Fix crash on overlayfs mount

2021-01-13 Thread Andrey Ryabinin
Kdump kernel fails to load because of crash on mount of overlayfs:

 BUG: unable to handle kernel NULL pointer dereference at 0060

 Call Trace:
  seq_path+0x64/0xb0
  print_paths_option+0x79/0xa0
  ovl_show_options+0x3a/0x320
  show_mountinfo+0x1ee/0x290
  seq_read+0x2f8/0x400
  vfs_read+0x9d/0x150
  ksys_read+0x4f/0xb0
  do_syscall_64+0x5b/0x1a0

This is cause by OOB access of ofs->lowerpaths.
We transfer to print_paths_option() ofs->numlayer as size of ->lowerpaths
array, but it's not. We could probably pass 'ofs->numlayer - 1' as
number of lower layers/path, but it's better to remove lowerpaths
completely. All necessary information already contained in 'struct ovl_entry'.
Use it to print paths instead.

Fixes: 17fc61697f73 ("overlayfs: add dynamic path resolving in mount options")
Fixes: 2191d729083d ("overlayfs: add mnt_id paths options")

https://jira.sw.ru/browse/PSBM-123508
Signed-off-by: Andrey Ryabinin 
---
 fs/overlayfs/overlayfs.h |  4 ++--
 fs/overlayfs/ovl_entry.h |  1 -
 fs/overlayfs/super.c | 30 ++
 fs/overlayfs/util.c  | 13 +
 4 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 7e103d002819..a708ebbd2e21 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -313,10 +313,10 @@ ssize_t ovl_getxattr(struct dentry *dentry, char *name, 
char **value,
 
 void print_path_option(struct seq_file *m, const char *name, struct path 
*path);
 void print_paths_option(struct seq_file *m, const char *name,
-   struct path *paths, unsigned int num);
+   struct ovl_path *paths, unsigned int num);
 void print_mnt_id_option(struct seq_file *m, const char *name, struct path 
*path);
 void print_mnt_ids_option(struct seq_file *m, const char *name,
-   struct path *paths, unsigned int num);
+   struct ovl_path *paths, unsigned int num);
 
 static inline bool ovl_is_impuredir(struct dentry *dentry)
 {
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index ea1906448ec5..4e7272c7e4dd 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -54,7 +54,6 @@ struct ovl_fs {
unsigned int numlayer;
/* Number of unique fs among layers including upper fs */
unsigned int numfs;
-   struct path *lowerpaths;
const struct ovl_layer *layers;
struct ovl_sb *fs;
/* workbasepath is the path at workdir= mount option */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 3755f280036f..069d365a609d 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -241,11 +241,6 @@ static void ovl_free_fs(struct ovl_fs *ofs)
ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
mntput(ofs->upper_mnt);
path_put(>upperpath);
-   if (ofs->lowerpaths) {
-   for (i = 0; i < ofs->numlayer; i++)
-   path_put(>lowerpaths[i]);
-   kfree(ofs->lowerpaths);
-   }
for (i = 1; i < ofs->numlayer; i++) {
iput(ofs->layers[i].trap);
mntput(ofs->layers[i].mnt);
@@ -359,9 +354,10 @@ static int ovl_show_options(struct seq_file *m, struct 
dentry *dentry)
 {
struct super_block *sb = dentry->d_sb;
struct ovl_fs *ofs = sb->s_fs_info;
+   struct ovl_entry *oe = OVL_E(dentry);
 
if (ovl_dyn_path_opts) {
-   print_paths_option(m, "lowerdir", ofs->lowerpaths, 
ofs->numlayer);
+   print_paths_option(m, "lowerdir", oe->lowerstack, oe->numlower);
if (ofs->config.upperdir) {
print_path_option(m, "upperdir", >upperpath);
print_path_option(m, "workdir", >workbasepath);
@@ -375,7 +371,8 @@ static int ovl_show_options(struct seq_file *m, struct 
dentry *dentry)
}
 
if (ovl_mnt_id_path_opts) {
-   print_mnt_ids_option(m, "lowerdir_mnt_id", ofs->lowerpaths, 
ofs->numlayer);
+   print_mnt_ids_option(m, "lowerdir_mnt_id", oe->lowerstack, 
oe->numlower);
+
/*
 * We don't need to show mnt_id for workdir because it
 * on the same mount as upperdir.
@@ -1625,6 +1622,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct 
super_block *sb,
 {
int err;
char *lowertmp, *lower;
+   struct path *stack = NULL;
unsigned int stacklen, numlower = 0, i;
struct ovl_entry *oe;
 
@@ -1649,14 +1647,14 @@ static struct ovl_entry *ovl_get_lowerstack(struct 
super_block *sb,
}
 
err = -ENOMEM;
-   ofs->lowerpaths = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
-   if (!ofs->lowerpaths)
+   stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
+   if (!stack)
goto out_err;
 
err = -EINVAL;
lower = lowertmp;
for (numlower = 0; numlower < stacklen; 

[Devel] [PATCH RHEL7 COMMIT] overlayfs: relax capable check for trusted prefix xattrs

2021-01-13 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.11.1.vz7.172.9
-->
commit 2a2b9806906460afcddda2cd273a9933504ee8b9
Author: Andrey Zhadchenko 
Date:   Wed Jan 13 13:25:35 2021 +0300

overlayfs: relax capable check for trusted prefix xattrs

ovl_listxattr() additionally check if attributes can be shown to user by
calling capable(). Change it to ve_capable() to avoid problems in 
containers.

https://jira.sw.ru/browse/PSBM-124532
Signed-off-by: Andrey Zhadchenko 
---
 fs/overlayfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 5d38014..2d7d5a7 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -366,7 +366,7 @@ static bool ovl_can_list(const char *s)
return true;
 
/* Never list trusted.overlay, list other trusted for superuser only */
-   return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
+   return !ovl_is_private_xattr(s) && ve_capable(CAP_SYS_ADMIN);
 }
 
 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] Improve vps_dumpable check in __ptrace_may_access()

2021-01-13 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.11.1.vz7.172.9
-->
commit 9c9eb0f3b907a14875ab307466721a81b68bf3ee
Author: Vasily Averin 
Date:   Wed Jan 13 13:25:20 2021 +0300

Improve vps_dumpable check in __ptrace_may_access()

__ptrace_may_access() is used when someone tries to
readlink /proc/$PID/ns/$SOMENS and gets -EPERM for kthreads
because they lack task->mm and thus "vps_dumpable" field.

Let's return error only for non-kernel threads and follow usual
restrictions for kernel threads.

https://jira.sw.ru/browse/PSBM-92107
Signed-off-by: Vasily Averin 
---
 kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 4b7dd80..a3622fe 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -294,7 +294,7 @@ ok:
rcu_read_unlock();
return -EPERM;
}
-   if (!vps_dumpable && !ve_is_super(get_exec_env())) {
+   if (task->mm && !vps_dumpable && !ve_is_super(get_exec_env())) {
rcu_read_unlock();
return -EPERM;
}
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] per-memcg negative dentries accounting

2021-01-13 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.11.1.vz7.172.9
-->
commit 8d980b969c46db524938d67a252b6f986999a67c
Author: Vasily Averin 
Date:   Wed Jan 13 13:25:44 2021 +0300

per-memcg negative dentries accounting

This patch adds per-memcg negative dentries accounting
and makes them visible in userspace via memcg statistic

https://jira.sw.ru/browse/PSBM-104223
https://bugs.openvz.org/browse/OVZ-7225
Signed-off-by: Vasily Averin 
---
 fs/dcache.c| 24 ++--
 include/linux/memcontrol.h |  3 +++
 mm/memcontrol.c| 36 
 3 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 96ac93f..a9a3bb5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -393,8 +393,10 @@ static void dentry_unlink_inode(struct dentry * dentry)
struct inode *inode = dentry->d_inode;
__d_clear_type(dentry);
dentry->d_inode = NULL;
-   if (dentry->d_flags & DCACHE_LRU_LIST)
+   if (dentry->d_flags & DCACHE_LRU_LIST) {
this_cpu_inc(nr_dentry_negative);
+   memcg_neg_dentry_inc(dentry);
+   }
hlist_del_init(>d_alias);
dentry_rcuwalk_invalidate(dentry);
spin_unlock(>d_lock);
@@ -432,8 +434,10 @@ static void d_lru_add(struct dentry *dentry)
D_FLAG_VERIFY(dentry, 0);
dentry->d_flags |= DCACHE_LRU_LIST;
this_cpu_inc(nr_dentry_unused);
-   if (d_is_negative(dentry))
+   if (d_is_negative(dentry)) {
this_cpu_inc(nr_dentry_negative);
+   memcg_neg_dentry_inc(dentry);
+   }
WARN_ON_ONCE(!list_lru_add(>d_sb->s_dentry_lru, 
>d_lru));
 }
 
@@ -442,8 +446,10 @@ static void d_lru_del(struct dentry *dentry)
D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
dentry->d_flags &= ~DCACHE_LRU_LIST;
this_cpu_dec(nr_dentry_unused);
-   if (d_is_negative(dentry))
+   if (d_is_negative(dentry)) {
this_cpu_dec(nr_dentry_negative);
+   memcg_neg_dentry_dec(dentry);
+   }
WARN_ON_ONCE(!list_lru_del(>d_sb->s_dentry_lru, 
>d_lru));
 }
 
@@ -474,8 +480,10 @@ static void d_lru_isolate(struct list_lru_one *lru, struct 
dentry *dentry)
D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
dentry->d_flags &= ~DCACHE_LRU_LIST;
this_cpu_dec(nr_dentry_unused);
-   if (d_is_negative(dentry))
+   if (d_is_negative(dentry)) {
this_cpu_dec(nr_dentry_negative);
+   memcg_neg_dentry_dec(dentry);
+   }
list_lru_isolate(lru, >d_lru);
 }
 
@@ -484,8 +492,10 @@ static void d_lru_shrink_move(struct list_lru_one *lru, 
struct dentry *dentry,
 {
D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
dentry->d_flags |= DCACHE_SHRINK_LIST;
-   if (d_is_negative(dentry))
+   if (d_is_negative(dentry)) {
this_cpu_dec(nr_dentry_negative);
+   memcg_neg_dentry_dec(dentry);
+   }
list_lru_isolate_move(lru, >d_lru, list);
 }
 
@@ -1871,8 +1881,10 @@ static void __d_instantiate(struct dentry *dentry, 
struct inode *inode)
/*
 * Decrement negative dentry count if it was in the LRU list.
 */
-   if (dentry->d_flags & DCACHE_LRU_LIST)
+   if (dentry->d_flags & DCACHE_LRU_LIST) {
this_cpu_dec(nr_dentry_negative);
+   memcg_neg_dentry_dec(dentry);
+   }
__d_set_type(dentry, add_flags);
if (inode)
hlist_add_head(>d_alias, >i_dentry);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5e16658..f609a15 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -229,6 +229,9 @@ static inline void mem_cgroup_dec_page_stat(struct page 
*page,
mem_cgroup_update_page_stat(page, idx, -1);
 }
 
+void memcg_neg_dentry_inc(struct dentry *dentry);
+void memcg_neg_dentry_dec(struct dentry *dentry);
+
 void mem_cgroup_fill_vmstat(struct mem_cgroup *memcg, unsigned long *stats);
 
 unsigned long memcg_ws_activates(struct mem_cgroup *memcg);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1c32810..1eae25d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -183,6 +183,7 @@ struct mem_cgroup_stat_cpu {
unsigned long events[MEM_CGROUP_EVENTS_NSTATS];
unsigned long nr_page_events;
unsigned long targets[MEM_CGROUP_NTARGETS];
+   unsigned long nr_dentry_neg;
 };
 
 struct mem_cgroup_stat2_cpu {
@@ -1165,6 +1166,40 @@ mem_cgroup_read_stat2(struct mem_cgroup *memcg, enum 
mem_cgroup_stat2_index idx)
return percpu_counter_sum_positive(>stat2.counters[idx]);
 }
 
+static inline unsigned long
+mem_cgroup_read_nd(struct mem_cgroup *memcg)
+{
+   long val = 0;
+   int cpu;
+
+   /* Per-cpu values can be negative, use a signed accumulator */
+   

[Devel] [PATCH RHEL7 COMMIT] Revert "proc/pid: Don't show kernel threads inside Containers"

2021-01-13 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.11.1.vz7.172.9
-->
commit 2a0d86b26795f6081c9e84f1b5120dccd933d542
Author: Vasily Averin 
Date:   Wed Jan 13 13:25:27 2021 +0300

Revert "proc/pid: Don't show kernel threads inside Containers"

This reverts commit 685ba4589e1bea845e3234fd14ce0dce295ba3de.
if kernel threads are not visible inside container,
nfsd service cannot find them and cannot properly report own status.

The probelm fixed by  reverted pacth should be fixed by another patch
improved vps_dumpable check in __ptrace_may_access()

https://jira.sw.ru/browse/PSBM-92107
https://bugs.openvz.org/browse/OVZ-7245
Signed-off-by: Vasily Averin 
---
 fs/proc/base.c | 14 --
 1 file changed, 14 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 965a746..dbf5a84 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3300,15 +3300,6 @@ out:
return error;
 }
 
-static bool is_visible_task_ve(struct pid_namespace *ns, struct task_struct 
*task)
-{
-   /* Don't show kthreads inside Containers. */
-   if ((task->flags & PF_KTHREAD) && (ns != _pid_ns))
-   return false;
-
-   return true;
-}
-
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, 
unsigned int flags)
 {
struct dentry *result = NULL;
@@ -3323,8 +3314,6 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct 
dentry * dentry, unsign
ns = dentry->d_sb->s_fs_info;
rcu_read_lock();
task = find_task_by_pid_ns(tgid, ns);
-   if (task && !is_visible_task_ve(ns, task))
-   task = NULL;
if (task)
get_task_struct(task);
rcu_read_unlock();
@@ -3421,9 +3410,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, 
filldir_t filldir)
for (iter = next_tgid(ns, iter);
 iter.task;
 iter.tgid += 1, iter = next_tgid(ns, iter)) {
-   if (!is_visible_task_ve(ns, iter.task))
-   continue;
-
if (is_visible_task(ns, iter.task))
__filldir = filldir;
else
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] cbt: Update CBT size from check_disk_size_change()

2021-01-13 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.11.1.vz7.172.9
-->
commit 042072dc38993013438cc8803e61a7d87501449e
Author: Kirill Tkhai 
Date:   Wed Jan 13 13:25:07 2021 +0300

cbt: Update CBT size from check_disk_size_change()

Here is customer node, where is CBT size is different
to ploop size. Searching against kernel code shows,
this is the only place we skip CBT size update after
bd_inode size change.

https://jira.sw.ru/browse/PSBM-123819
Signed-off-by: Kirill Tkhai 
---
 fs/block_dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 85e2a0e..e6b8c94 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1083,6 +1083,7 @@ void check_disk_size_change(struct gendisk *disk, struct 
block_device *bdev)
   "%s: detected capacity change from %lld to %lld\n",
   name, bdev_size, disk_size);
i_size_write(bdev->bd_inode, disk_size);
+   blk_cbt_update_size(bdev);
flush_disk(bdev, false);
}
 }
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] ploop: Forced clear discard alignment

2021-01-13 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1160.11.1.vz7.172.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1160.11.1.vz7.172.9
-->
commit cb283d02fc12ee9442331de6c9d8fce56feb57b0
Author: Kirill Tkhai 
Date:   Wed Jan 13 13:25:13 2021 +0300

ploop: Forced clear discard alignment

On customer node we met configuration:

/dev/mapper/virtuozzo-vz on /vz type ext4 (rw,noatime,lazytime,data=ordered)

virtuozzo-vz: 0 1046470656 linear 8:19 2048
virtuozzo-vz: 1046470656 209707008 linear 8:20 2048

brw-rw  1 root disk  8, 19 Jan 12 08:51 sdb3
brw-rw  1 root disk  8, 20 Jan 12 08:51 sdb4

$cat /sys/block/sdb/sdb4/start
1048576171

After that, ploop discard becomes unaligned, and we can't
build whole-cluster requests.

$cat /sys/block/ploop23881/discard_alignment
2560

Since we act on /vz filesystem and since discard frees filesystem
blocks, we should inherit nothing discard-related from underlining
block device. So, we clear inherited discard_alignment.

https://jira.sw.ru/browse/PSBM-124496
Signed-off-by: Kirill Tkhai 
---
 include/linux/ploop/ploop.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index 766a0e9f..2256eb4 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -884,6 +884,8 @@ static inline void ploop_set_discard_limits(struct 
ploop_device *plo)
 */
q->limits.discard_granularity = cluster_size_in_bytes(plo);
q->limits.max_discard_sectors = (1 << plo->cluster_log);
+   q->limits.discard_alignment = 0;
+   q->limits.discard_misaligned = 0;
 }
 struct map_node;
 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RH7] per-memcg negative dentries accounting

2021-01-13 Thread Vasily Averin
This patch adds per-memcg negative dentries accounting
and makes them visible in userspace via memcg statistic

https://jira.sw.ru/browse/PSBM-104223
https://bugs.openvz.org/browse/OVZ-7225
Signed-off-by: Vasily Averin 
---
 fs/dcache.c| 24 ++--
 include/linux/memcontrol.h |  3 +++
 mm/memcontrol.c| 36 
 3 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 96ac93f..a9a3bb5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -393,8 +393,10 @@ static void dentry_unlink_inode(struct dentry * dentry)
struct inode *inode = dentry->d_inode;
__d_clear_type(dentry);
dentry->d_inode = NULL;
-   if (dentry->d_flags & DCACHE_LRU_LIST)
+   if (dentry->d_flags & DCACHE_LRU_LIST) {
this_cpu_inc(nr_dentry_negative);
+   memcg_neg_dentry_inc(dentry);
+   }
hlist_del_init(>d_alias);
dentry_rcuwalk_invalidate(dentry);
spin_unlock(>d_lock);
@@ -432,8 +434,10 @@ static void d_lru_add(struct dentry *dentry)
D_FLAG_VERIFY(dentry, 0);
dentry->d_flags |= DCACHE_LRU_LIST;
this_cpu_inc(nr_dentry_unused);
-   if (d_is_negative(dentry))
+   if (d_is_negative(dentry)) {
this_cpu_inc(nr_dentry_negative);
+   memcg_neg_dentry_inc(dentry);
+   }
WARN_ON_ONCE(!list_lru_add(>d_sb->s_dentry_lru, 
>d_lru));
 }
 
@@ -442,8 +446,10 @@ static void d_lru_del(struct dentry *dentry)
D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
dentry->d_flags &= ~DCACHE_LRU_LIST;
this_cpu_dec(nr_dentry_unused);
-   if (d_is_negative(dentry))
+   if (d_is_negative(dentry)) {
this_cpu_dec(nr_dentry_negative);
+   memcg_neg_dentry_dec(dentry);
+   }
WARN_ON_ONCE(!list_lru_del(>d_sb->s_dentry_lru, 
>d_lru));
 }
 
@@ -474,8 +480,10 @@ static void d_lru_isolate(struct list_lru_one *lru, struct 
dentry *dentry)
D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
dentry->d_flags &= ~DCACHE_LRU_LIST;
this_cpu_dec(nr_dentry_unused);
-   if (d_is_negative(dentry))
+   if (d_is_negative(dentry)) {
this_cpu_dec(nr_dentry_negative);
+   memcg_neg_dentry_dec(dentry);
+   }
list_lru_isolate(lru, >d_lru);
 }
 
@@ -484,8 +492,10 @@ static void d_lru_shrink_move(struct list_lru_one *lru, 
struct dentry *dentry,
 {
D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
dentry->d_flags |= DCACHE_SHRINK_LIST;
-   if (d_is_negative(dentry))
+   if (d_is_negative(dentry)) {
this_cpu_dec(nr_dentry_negative);
+   memcg_neg_dentry_dec(dentry);
+   }
list_lru_isolate_move(lru, >d_lru, list);
 }
 
@@ -1871,8 +1881,10 @@ static void __d_instantiate(struct dentry *dentry, 
struct inode *inode)
/*
 * Decrement negative dentry count if it was in the LRU list.
 */
-   if (dentry->d_flags & DCACHE_LRU_LIST)
+   if (dentry->d_flags & DCACHE_LRU_LIST) {
this_cpu_dec(nr_dentry_negative);
+   memcg_neg_dentry_dec(dentry);
+   }
__d_set_type(dentry, add_flags);
if (inode)
hlist_add_head(>d_alias, >i_dentry);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5e16658..f609a15 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -229,6 +229,9 @@ static inline void mem_cgroup_dec_page_stat(struct page 
*page,
mem_cgroup_update_page_stat(page, idx, -1);
 }
 
+void memcg_neg_dentry_inc(struct dentry *dentry);
+void memcg_neg_dentry_dec(struct dentry *dentry);
+
 void mem_cgroup_fill_vmstat(struct mem_cgroup *memcg, unsigned long *stats);
 
 unsigned long memcg_ws_activates(struct mem_cgroup *memcg);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1c32810..1eae25d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -183,6 +183,7 @@ struct mem_cgroup_stat_cpu {
unsigned long events[MEM_CGROUP_EVENTS_NSTATS];
unsigned long nr_page_events;
unsigned long targets[MEM_CGROUP_NTARGETS];
+   unsigned long nr_dentry_neg;
 };
 
 struct mem_cgroup_stat2_cpu {
@@ -1165,6 +1166,40 @@ mem_cgroup_read_stat2(struct mem_cgroup *memcg, enum 
mem_cgroup_stat2_index idx)
return percpu_counter_sum_positive(>stat2.counters[idx]);
 }
 
+static inline unsigned long
+mem_cgroup_read_nd(struct mem_cgroup *memcg)
+{
+   long val = 0;
+   int cpu;
+
+   /* Per-cpu values can be negative, use a signed accumulator */
+   for_each_possible_cpu(cpu)
+   val += per_cpu(memcg->stat->nr_dentry_neg, cpu);
+   /*
+* Summing races with updates, so val may be negative.  Avoid exposing
+* transient negative values.
+*/
+   if (val < 0)
+   val = 0;
+   return val;
+}
+
+void memcg_neg_dentry_inc(struct dentry *dentry)
+{
+