When there are more than 2 users of a page,  __tcache_page_tree_delete()
fails to freeze it. We skip it and never try to freeze it again.

In this case the page remains not invalidated, and tcache_node->nr_pages
never decremented. Later, we catch WARN_ON() reporting about this.

tcache_shrink_scan()                                   tcache_destroy_pool
   tcache_lru_isolate()                                
      tcache_grab_pool()
      ...
      page_cache_get_speculative() -->cnt == 2

      ...
      tcache_put_pool() --> pool cnt zero
      ...                                                  
wait_for_completion(&pool->completion);
   tcache_reclaim_pages                                    
tcache_invalidate_node_pages()
      __tcache_reclaim_page()                                  tcache_lookup()
                                                                  
page_cache_get_speculative  --> cnt == 3
                                                               
__tcache_page_tree_delete
        page_ref_freeze(2) -->fail                                
page_ref_freeze(2) -->fail

The patch fixes the problem. In case of we failed to invalidate a page,
we remember this, and return to such pages after others are invalidated.

https://jira.sw.ru/browse/PSBM-78354

v2: Also fix tcache_detach_page()

Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com>
---
 mm/tcache.c |   21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/mm/tcache.c b/mm/tcache.c
index d1a2c53e11a3..760e417d491b 100644
--- a/mm/tcache.c
+++ b/mm/tcache.c
@@ -850,6 +850,14 @@ static struct page *tcache_detach_page(struct tcache_node 
*node, pgoff_t index,
                if (page)
                        tcache_lru_del(node->pool, page, reused);
                local_irq_restore(flags);
+               /*
+                * Shrinker could isolated the page in parallel
+                * with us. This case page_ref_freeze(page, 2)
+                * in __tcache_page_tree_delete() fails, and
+                * we have to repeat the cycle.
+                */
+               if (!page)
+                       goto repeat;
        }
 
        return page;
@@ -903,13 +911,15 @@ tcache_invalidate_node_pages(struct tcache_node *node)
        struct page *pages[TCACHE_PAGEVEC_SIZE];
        pgoff_t index = 0;
        unsigned nr_pages;
+       bool repeat;
        int i;
 
        /*
         * First forbid new page insertions - see tcache_page_tree_replace.
         */
        node->invalidated = true;
-
+again:
+       repeat = false;
        while ((nr_pages = tcache_lookup(pages, node, index,
                                                TCACHE_PAGEVEC_SIZE, indices))) 
{
                for (i = 0; i < nr_pages; i++) {
@@ -925,13 +935,20 @@ tcache_invalidate_node_pages(struct tcache_node *node)
                                tcache_lru_del(node->pool, page, false);
                                local_irq_enable();
                                tcache_put_page(page);
-                       } else
+                       } else {
                                local_irq_enable();
+                               repeat = true;
+                       }
                }
                cond_resched();
                index++;
        }
 
+       if (repeat) {
+               index = 0;
+               goto again;
+       }
+
        WARN_ON(node->nr_pages != 0);
 }
 

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to