[Devel] [PATCH vz8] mm, memcg: add oom counter to memory.stat memcgroup file

2020-10-02 Thread Andrey Ryabinin
Add oom counter to memory.stat file. oom shows amount of oom kills
triggered due to cgroup's memory limit. total_oom shows total sum of
oom kills triggered due to cgroup's and it's sub-groups memory limits.

memory.stat in the root cgroup counts global oom kills.

E.g:
 # mkdir /sys/fs/cgroup/memory/test/
 # echo 100M > /sys/fs/cgroup/memory/test/memory.limit_in_bytes
 # echo 100M > /sys/fs/cgroup/memory/test/memory.memsw.limit_in_bytes
 # echo $$ > /sys/fs/cgroup/memory/test/tasks
 # ./vm-scalability/usemem -O 200M
 # grep oom /sys/fs/cgroup/memory/test/memory.stat
   oom 1
   total_oom 1
 # echo -1 > /sys/fs/cgroup/memory/test/memory.memsw.limit_in_bytes
 # echo -1 > /sys/fs/cgroup/memory/test/memory.limit_in_bytes
 # ./vm-scalability/usemem -O 1000G
 # grep oom /sys/fs/cgroup/memory/memory.stat
oom 1
total_oom 2

https://jira.sw.ru/browse/PSBM-108287
Signed-off-by: Andrey Ryabinin 
---
 include/linux/memcontrol.h |  2 ++
 mm/memcontrol.c| 33 ++---
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b097f137a3df..eb8634128a81 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -75,6 +75,8 @@ struct accumulated_stats {
unsigned long stat[MEMCG_NR_STAT];
unsigned long events[NR_VM_EVENT_ITEMS];
unsigned long lru_pages[NR_LRU_LISTS];
+   unsigned long oom;
+   unsigned long oom_kill;
const unsigned int *stats_array;
const unsigned int *events_array;
int stats_size;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 37d4df653f39..ca3a07543416 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3144,6 +3144,8 @@ void accumulate_memcg_tree(struct mem_cgroup *memcg,
for (i = 0; i < NR_LRU_LISTS; i++)
acc->lru_pages[i] +=
mem_cgroup_nr_lru_pages(mi, BIT(i));
+   acc->oom += atomic_long_read(>memory_events[MEMCG_OOM]);
+   acc->oom_kill += 
atomic_long_read(>memory_events[MEMCG_OOM_KILL]);
 
cond_resched();
}
@@ -3899,6 +3901,13 @@ static int memcg_stat_show(struct seq_file *m, void *v)
BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
 
+   memset(, 0, sizeof(acc));
+   acc.stats_size = ARRAY_SIZE(memcg1_stats);
+   acc.stats_array = memcg1_stats;
+   acc.events_size = ARRAY_SIZE(memcg1_events);
+   acc.events_array = memcg1_events;
+   accumulate_memcg_tree(memcg, );
+
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
continue;
@@ -3911,6 +3920,18 @@ static int memcg_stat_show(struct seq_file *m, void *v)
seq_printf(m, "%s %lu\n", memcg1_event_names[i],
   memcg_sum_events(memcg, memcg1_events[i]));
 
+   /*
+* For root_mem_cgroup we want to account global ooms as well.
+* The diff between allo MEMCG_OOM_KILL and MEMCG_OOM events
+* should give us the glogbal ooms count.
+*/
+   if (memcg == root_mem_cgroup)
+   seq_printf(m, "oom %lu\n", acc.oom_kill - acc.oom +
+   atomic_long_read(>memory_events[MEMCG_OOM]));
+   else
+   seq_printf(m, "oom %lu\n",
+   atomic_long_read(>memory_events[MEMCG_OOM]));
+
for (i = 0; i < NR_LRU_LISTS; i++)
seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
   mem_cgroup_nr_lru_pages(memcg, BIT(i)) * PAGE_SIZE);
@@ -3927,13 +3948,6 @@ static int memcg_stat_show(struct seq_file *m, void *v)
seq_printf(m, "hierarchical_memsw_limit %llu\n",
   (u64)memsw * PAGE_SIZE);
 
-   memset(, 0, sizeof(acc));
-   acc.stats_size = ARRAY_SIZE(memcg1_stats);
-   acc.stats_array = memcg1_stats;
-   acc.events_size = ARRAY_SIZE(memcg1_events);
-   acc.events_array = memcg1_events;
-   accumulate_memcg_tree(memcg, );
-
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
continue;
@@ -3945,6 +3959,11 @@ static int memcg_stat_show(struct seq_file *m, void *v)
seq_printf(m, "total_%s %llu\n", memcg1_event_names[i],
   (u64)acc.events[i]);
 
+   if (memcg == root_mem_cgroup)
+   seq_printf(m, "total_oom %lu\n", acc.oom_kill);
+   else
+   seq_printf(m, "total_oom %lu\n", acc.oom);
+
for (i = 0; i < NR_LRU_LISTS; i++)
seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i],
   (u64)acc.lru_pages[i] * PAGE_SIZE);
-- 
2.26.2

___
Devel 

[Devel] [PATCH vz8 2/2] mm/memcg: fix cache growth above cache.limit_in_bytes

2020-10-02 Thread Andrey Ryabinin
Exceeding cache above cache.limit_in_bytes schedules high_work_func()
which tries to reclaim 32 pages. If cache generated fast enough or it allows
cgroup to steadily grow above cache.limit_in_bytes because we don't reclaim
enough. Try to reclaim exceeded amount of cache instead.

https://jira.sw.ru/browse/PSBM-106384
Signed-off-by: Andrey Ryabinin 
---
 mm/memcontrol.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c30150b8732d..37d4df653f39 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2213,14 +2213,18 @@ static void reclaim_high(struct mem_cgroup *memcg,
 {
 
do {
+   long cache_overused;
 
if (page_counter_read(>memory) > memcg->high) {
memcg_memory_event(memcg, MEMCG_HIGH);
try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, 
true);
}
 
-   if (page_counter_read(>cache) > memcg->cache.max)
-   try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, 
false);
+   cache_overused = page_counter_read(>cache) -
+   memcg->cache.max;
+
+   if (cache_overused > 0)
+   try_to_free_mem_cgroup_pages(memcg, cache_overused, 
gfp_mask, false);
} while ((memcg = parent_mem_cgroup(memcg)));
 }
 
-- 
2.26.2

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH vz8 1/2] mm/memcg: reclaim memory.cache.limit_in_bytes from background

2020-10-02 Thread Andrey Ryabinin
Reclaiming memory above memory.cache.limit_in_bytes always in direct
reclaim mode adds to much of a cost for vstorage. Instead of direct
reclaim allow to overflow memory.cache.limit_in_bytes but launch
the reclaim in background task.

https://pmc.acronis.com/browse/VSTOR-24395
https://jira.sw.ru/browse/PSBM-94761
Signed-off-by: Andrey Ryabinin 
---
 mm/memcontrol.c | 42 ++
 1 file changed, 18 insertions(+), 24 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 68242a72be4d..c30150b8732d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2211,11 +2211,16 @@ static void reclaim_high(struct mem_cgroup *memcg,
 unsigned int nr_pages,
 gfp_t gfp_mask)
 {
+
do {
-   if (page_counter_read(>memory) <= memcg->high)
-   continue;
-   memcg_memory_event(memcg, MEMCG_HIGH);
-   try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+
+   if (page_counter_read(>memory) > memcg->high) {
+   memcg_memory_event(memcg, MEMCG_HIGH);
+   try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, 
true);
+   }
+
+   if (page_counter_read(>cache) > memcg->cache.max)
+   try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, 
false);
} while ((memcg = parent_mem_cgroup(memcg)));
 }
 
@@ -2270,13 +2275,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t 
gfp_mask, bool kmem_charge
refill_stock(memcg, nr_pages);
goto charge;
}
-
-   if (cache_charge && !page_counter_try_charge(
-   >cache, nr_pages, )) {
-   refill_stock(memcg, nr_pages);
-   goto charge;
-   }
-   return 0;
+   css_get_many(>css, batch);
+   goto done;
}
 
 charge:
@@ -2301,19 +2301,6 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t 
gfp_mask, bool kmem_charge
}
}
 
-   if (!mem_over_limit && cache_charge) {
-   if (page_counter_try_charge(>cache, nr_pages, ))
-   goto done_restock;
-
-   may_swap = false;
-   mem_over_limit = mem_cgroup_from_counter(counter, cache);
-   page_counter_uncharge(>memory, batch);
-   if (do_memsw_account())
-   page_counter_uncharge(>memsw, batch);
-   if (kmem_charge)
-   page_counter_uncharge(>kmem, nr_pages);
-   }
-
if (!mem_over_limit)
goto done_restock;
 
@@ -2437,6 +2424,9 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t 
gfp_mask, bool kmem_charge
css_get_many(>css, batch);
if (batch > nr_pages)
refill_stock(memcg, batch - nr_pages);
+done:
+   if (cache_charge)
+   page_counter_charge(>cache, nr_pages);
 
/*
 * If the hierarchy is above the normal consumption range, schedule
@@ -2457,7 +2447,11 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t 
gfp_mask, bool kmem_charge
current->memcg_nr_pages_over_high += batch;
set_notify_resume(current);
break;
+   } else if (page_counter_read(>cache) > memcg->cache.max) 
{
+   if (!work_pending(>high_work))
+   schedule_work(>high_work);
}
+
} while ((memcg = parent_mem_cgroup(memcg)));
 
return 0;
-- 
2.26.2

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL8 COMMIT] tmpfs: shmem_fallocate must return ERESTARTSYS

2020-10-02 Thread Konstantin Khorenko
The commit is pushed to "branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz" and will appear 
at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-193.6.3.vz8.4.11
-->
commit a1e224e1275b5e96d90edba96e3cb43a25437ab1
Author: Maxim Patlasov 
Date:   Fri Mar 18 15:51:06 2016 +0400

tmpfs: shmem_fallocate must return ERESTARTSYS

shmem_fallocate() is restartable, so it can return ERESTARTSYS if
signal_pending(). Although fallocate(2) manpage permits EINTR,
the more places use ERESTARTSYS the better.

https://jira.sw.ru/browse/PSBM-43399

Signed-off-by: Maxim Patlasov 
Acked-by: Dmitry Monakhov 

mpatlasov@: note, shmem_fallocate() doesn't return ERESTARTSYS to userspace.
It returns ERESTARTSYS to some internal kernel function that restarts 
syscall
(I tested this behavior). This is how any other "return -ERESTARTSYS;" 
works.

(cherry picked from vz7 commit 0011c60ac1fde1f735d2474c967b454b9019461a)
Signed-off-by: Konstantin Khorenko 
---
 mm/shmem.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 90211848f742..a2b08da45ac3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2742,11 +2742,13 @@ static long shmem_fallocate(struct file *file, int 
mode, loff_t offset,
struct page *page;
 
/*
-* Good, the fallocate(2) manpage permits EINTR: we may have
-* been interrupted because we are using up too much memory.
+* Although fallocate(2) manpage permits EINTR, the more
+* places use ERESTARTSYS the better. If we have been
+* interrupted because we are using up too much memory,
+* oom-killer used fatal signal and we will die anyway.
 */
if (signal_pending(current))
-   error = -EINTR;
+   error = -ERESTARTSYS;
else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
error = -ENOMEM;
else
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL8 COMMIT] mm/tcache: restore missing rcu_read_lock() in tcache_detach_page() #PSBM-120802

2020-10-02 Thread Konstantin Khorenko
The commit is pushed to "branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz" and will appear 
at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-193.6.3.vz8.4.11
-->
commit f5360935ed2747e922dce38ea5fb9bf9aa94f589
Author: Evgenii Shatokhin 
Date:   Fri Oct 2 18:25:48 2020 +0300

mm/tcache: restore missing rcu_read_lock() in tcache_detach_page() 
#PSBM-120802

Looks like rcu_read_lock() was lost in "out:" path of tcache_detach_page()
when tcache was ported to VZ8. As a result, Syzkaller was able to hit
the following warning:

  WARNING: bad unlock balance detected!
  4.18.0-193.6.3.vz8.4.7.syz+debug #1 Tainted: GW
-r-  -
  -
  vcmmd/926 is trying to release lock (rcu_read_lock) at:
  [] tcache_detach_page+0x530/0x750
  but there are no more locks to release!

  other info that might help us debug this:
  2 locks held by vcmmd/926:
   #0: 888036331f30 (>mmap_sem){}, at: 
__do_page_fault+0x157/0x550
   #1: 8880567295f8 (>i_mmap_sem){}, at: 
ext4_filemap_fault+0x82/0xc0 [ext4]

  stack backtrace:
  CPU: 0 PID: 926 Comm: vcmmd ve: /
   Tainted: GW-r-  - 
4.18.0-193.6.3.vz8.4.7.syz+debug #1 4.7
  Hardware name: Virtuozzo KVM, BIOS 1.11.0-2.vz7.2 04/01/2014
  Call Trace:
   dump_stack+0xd2/0x148
   print_unlock_imbalance_bug.cold.40+0xc8/0xd4
   lock_release+0x5e3/0x1360
   tcache_detach_page+0x559/0x750
   tcache_cleancache_get_page+0xe9/0x780
   __cleancache_get_page+0x212/0x320
   ext4_mpage_readpages+0x165d/0x1b90 [ext4]
   ext4_readpages+0xd6/0x110 [ext4]
   read_pages+0xff/0x5b0
   __do_page_cache_readahead+0x3fc/0x5b0
   filemap_fault+0x912/0x1b80
   ext4_filemap_fault+0x8a/0xc0 [ext4]
   __do_fault+0x110/0x410
   do_fault+0x622/0x1010
   __handle_mm_fault+0x980/0x1120
   handle_mm_fault+0x17f/0x610
   __do_page_fault+0x25d/0x550
   do_page_fault+0x38/0x290
   do_async_page_fault+0x5b/0xe0
   async_page_fault+0x1e/0x30

Let us restore rcu_read_lock().

https://jira.sw.ru/browse/PSBM-120802
Fixes: a4831db86d48 ("mm: introduce transcendent file cache")
Fix in vz7: 152239c6c3b2 ("mm/tcache: fix rcu_read_lock()/rcu_read_unlock()
imbalance")

Signed-off-by: Evgenii Shatokhin 
Reviewed-by: Andrey Ryabinin 
---
 mm/tcache.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/tcache.c b/mm/tcache.c
index c7c5008fdac8..c40cf5de2dd8 100644
--- a/mm/tcache.c
+++ b/mm/tcache.c
@@ -853,8 +853,10 @@ static struct page *tcache_detach_page(struct tcache_node 
*node, pgoff_t index,
 * in __tcache_page_tree_delete() fails, and
 * we have to repeat the cycle.
 */
-   if (!page)
+   if (!page) {
+   rcu_read_lock();
goto repeat;
+   }
}
 
return page;
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL8 COMMIT] VE/sysrq-trigger: make it VE-namespace aware

2020-10-02 Thread Konstantin Khorenko
The commit is pushed to "branch-rh8-4.18.0-193.6.3.vz8.4.x-ovz" and will appear 
at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-193.6.3.vz8.4.11
-->
commit a4534c36188448fdd9d55dfbc06e28dbc4ae1637
Author: Stanislav Kinsbursky 
Date:   Wed Dec 18 13:11:04 2013 +0400

VE/sysrq-trigger: make it VE-namespace aware

Logic inherited from rhel6: instead of doing something, echo passed command 
in
container.

https://jira.sw.ru/browse/PSBM-20664

Signed-off-by: Stanislav Kinsbursky 

==
Original comment:

It's backport of patch:
diff-ve-fake-sysrq-trigger-in-ct-20091002

Patch from Konstantin Khorenko :
provides /proc/sysrq-trigger file inside a Container -
Oracle 11g Release 1 RAC needs this.

Writing to the file inside a CT leads to nothing, first 10 writes are 
logged.

https://bugzilla.sw.ru/show_bug.cgi?id=448149
==

(cherry picked from vz7 commit 103e7fa4e8e182287162a202cbab188fdec8509f)
Signed-off-by: Konstantin Khorenko 
---
 drivers/tty/sysrq.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 22973a5c59a2..40544af4d36f 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -50,6 +50,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1105,10 +1106,16 @@ static ssize_t write_sysrq_trigger(struct file *file, 
const char __user *buf,
 {
if (count) {
char c;
+   struct ve_struct *cur = get_exec_env();
+   static int pnum = 10;
 
if (get_user(c, buf))
return -EFAULT;
-   __handle_sysrq(c, false);
+   if (ve_is_super(cur))
+   __handle_sysrq(c, false);
+   else if (pnum--)
+   printk("SysRq: CT#%s sent '%c' magic key.\n",
+   cur->ve_name, c);
}
 
return count;
@@ -1121,7 +1128,7 @@ static const struct file_operations 
proc_sysrq_trigger_operations = {
 
 static void sysrq_init_procfs(void)
 {
-   if (!proc_create("sysrq-trigger", S_IWUSR, NULL,
+   if (!proc_create("sysrq-trigger", S_ISVTX | S_IWUSR, NULL,
 _sysrq_trigger_operations))
pr_err("Failed to register proc interface\n");
 }
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH RH8] mm/tcache: restore missing rcu_read_lock() in tcache_detach_page()

2020-10-02 Thread Andrey Ryabinin



On 10/2/20 5:13 PM, Evgenii Shatokhin wrote:
> Looks like rcu_read_lock() was lost in "out:" path of tcache_detach_page()
> when tcache was ported to VZ8. As a result, Syzkaller was able to hit
> the following warning:
> 
>   WARNING: bad unlock balance detected!
>   4.18.0-193.6.3.vz8.4.7.syz+debug #1 Tainted: GW-r-  
> -
>   -
>   vcmmd/926 is trying to release lock (rcu_read_lock) at:
>   [] tcache_detach_page+0x530/0x750
>   but there are no more locks to release!
> 
>   other info that might help us debug this:
>   2 locks held by vcmmd/926:
>#0: 888036331f30 (>mmap_sem){}, at: __do_page_fault+0x157/0x550
>#1: 8880567295f8 (>i_mmap_sem){}, at: 
> ext4_filemap_fault+0x82/0xc0 [ext4]
> 
>   stack backtrace:
>   CPU: 0 PID: 926 Comm: vcmmd ve: /
>Tainted: GW-r-  - 
> 4.18.0-193.6.3.vz8.4.7.syz+debug #1 4.7
>   Hardware name: Virtuozzo KVM, BIOS 1.11.0-2.vz7.2 04/01/2014
>   Call Trace:
>dump_stack+0xd2/0x148
>print_unlock_imbalance_bug.cold.40+0xc8/0xd4
>lock_release+0x5e3/0x1360
>tcache_detach_page+0x559/0x750
>tcache_cleancache_get_page+0xe9/0x780
>__cleancache_get_page+0x212/0x320
>ext4_mpage_readpages+0x165d/0x1b90 [ext4]
>ext4_readpages+0xd6/0x110 [ext4]
>read_pages+0xff/0x5b0
>__do_page_cache_readahead+0x3fc/0x5b0
>filemap_fault+0x912/0x1b80
>ext4_filemap_fault+0x8a/0xc0 [ext4]
>__do_fault+0x110/0x410
>do_fault+0x622/0x1010
>__handle_mm_fault+0x980/0x1120
>handle_mm_fault+0x17f/0x610
>__do_page_fault+0x25d/0x550
>do_page_fault+0x38/0x290
>do_async_page_fault+0x5b/0xe0
>async_page_fault+0x1e/0x30
> 
> Let us restore rcu_read_lock().
> 
> https://jira.sw.ru/browse/PSBM-120802
> Signed-off-by: Evgenii Shatokhin 

Reviewed-by: Andrey Ryabinin 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RH8] mm/tcache: restore missing rcu_read_lock() in tcache_detach_page()

2020-10-02 Thread Evgenii Shatokhin
Looks like rcu_read_lock() was lost in "out:" path of tcache_detach_page()
when tcache was ported to VZ8. As a result, Syzkaller was able to hit
the following warning:

  WARNING: bad unlock balance detected!
  4.18.0-193.6.3.vz8.4.7.syz+debug #1 Tainted: GW-r-  -
  -
  vcmmd/926 is trying to release lock (rcu_read_lock) at:
  [] tcache_detach_page+0x530/0x750
  but there are no more locks to release!

  other info that might help us debug this:
  2 locks held by vcmmd/926:
   #0: 888036331f30 (>mmap_sem){}, at: __do_page_fault+0x157/0x550
   #1: 8880567295f8 (>i_mmap_sem){}, at: 
ext4_filemap_fault+0x82/0xc0 [ext4]

  stack backtrace:
  CPU: 0 PID: 926 Comm: vcmmd ve: /
   Tainted: GW-r-  - 
4.18.0-193.6.3.vz8.4.7.syz+debug #1 4.7
  Hardware name: Virtuozzo KVM, BIOS 1.11.0-2.vz7.2 04/01/2014
  Call Trace:
   dump_stack+0xd2/0x148
   print_unlock_imbalance_bug.cold.40+0xc8/0xd4
   lock_release+0x5e3/0x1360
   tcache_detach_page+0x559/0x750
   tcache_cleancache_get_page+0xe9/0x780
   __cleancache_get_page+0x212/0x320
   ext4_mpage_readpages+0x165d/0x1b90 [ext4]
   ext4_readpages+0xd6/0x110 [ext4]
   read_pages+0xff/0x5b0
   __do_page_cache_readahead+0x3fc/0x5b0
   filemap_fault+0x912/0x1b80
   ext4_filemap_fault+0x8a/0xc0 [ext4]
   __do_fault+0x110/0x410
   do_fault+0x622/0x1010
   __handle_mm_fault+0x980/0x1120
   handle_mm_fault+0x17f/0x610
   __do_page_fault+0x25d/0x550
   do_page_fault+0x38/0x290
   do_async_page_fault+0x5b/0xe0
   async_page_fault+0x1e/0x30

Let us restore rcu_read_lock().

https://jira.sw.ru/browse/PSBM-120802
Signed-off-by: Evgenii Shatokhin 
---
 mm/tcache.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/tcache.c b/mm/tcache.c
index c7c5008fdac8..c40cf5de2dd8 100644
--- a/mm/tcache.c
+++ b/mm/tcache.c
@@ -853,8 +853,10 @@ static struct page *tcache_detach_page(struct tcache_node 
*node, pgoff_t index,
 * in __tcache_page_tree_delete() fails, and
 * we have to repeat the cycle.
 */
-   if (!page)
+   if (!page) {
+   rcu_read_lock();
goto repeat;
+   }
}
 
return page;
-- 
2.27.0

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] ploop: Change type of freezing argument on snapshot

2020-10-02 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.31
-->
commit e31114211ecd7c1a75fe7dc4e8dbbfdf96615db3
Author: Kirill Tkhai 
Date:   Fri Oct 2 13:10:04 2020 +0300

ploop: Change type of freezing argument on snapshot

This interface is currently unused, so we can change it.
Before become to use it, Igor requested a possibility
to pass partition/block device/crypto target instead
of mount point.

This patch changes the type of argument: now open
block device fd is required. Also added sanity check
of sync_fd: in case of passed sync_fd == -1, we don't
differ it with the case when it is not passed at all.

https://jira.sw.ru/browse/PSBM-107925
Signed-off-by: Kirill Tkhai 
---
 drivers/block/ploop/dev.c | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 81340f9..d6edbfb 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -3680,20 +3680,23 @@ static int get_bdev_from_fd(int fd, struct block_device 
**bdev)
 {
struct file *file = fget(fd);
struct inode *inode;
-   int ret = -ENODEV;
+   int ret;
 
if (!file)
-   return -ENODEV;
+   return -EBADF;
 
-   inode = file_inode(file);
+   ret = -ENODEV;
+   inode = file->f_mapping->host;
if (!inode)
goto fput;
 
-   *bdev = inode->i_sb->s_bdev;
-   if (*bdev) {
-   bdgrab(*bdev);
-   ret = 0;
-   }
+   ret = -ENOTBLK;
+   if (!S_ISBLK(inode->i_mode))
+   goto fput;
+
+   ret = 0;
+   *bdev = I_BDEV(inode);
+   bdgrab(*bdev);
 fput:
fput(file);
return ret;
@@ -3780,6 +3783,8 @@ static int ploop_snapshot(struct ploop_device * plo, 
unsigned long arg,
return -EINVAL;
/* The rest of fields are ignored */
sync_fd = chunk.pctl_fd;
+   if (sync_fd < 0)
+   return -EBADF;
ctl.pctl_chunks = 1;
}
if (ctl.pctl_chunks != 1)
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel