On Tue, Dec 08, 2015 at 06:52:50PM +0300, Andrey Ryabinin wrote:
> tcache_invalidate_node_pages() temporarly drops/takes back node->tree_lock.
> Once lock was dropped, another thread might remove and free the next slot.
> Don't drop the looks.
> 
> https://jira.sw.ru/browse/PSBM-42104
> 
> Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
> ---
>  mm/tcache.c | 13 +++----------
>  1 file changed, 3 insertions(+), 10 deletions(-)
> 
> diff --git a/mm/tcache.c b/mm/tcache.c
> index b8757cf..9bb88b4 100644
> --- a/mm/tcache.c
> +++ b/mm/tcache.c
> @@ -121,8 +121,9 @@ static struct tcache_lru *tcache_lru_node;
>  /*
>   * Locking rules:
>   *
> - * - tcache_node_tree->lock nests inside tcache_node->tree_lock
> - * - tcache_lru->lock is independent
> + *   tcache_node->tree_lock
> + *        tcache_node_tree->lock
> + *        tcache_lru->lock
>   */
>  
>  /* Enable/disable tcache backend (set at boot time) */
> @@ -677,16 +678,8 @@ tcache_invalidate_node_pages(struct tcache_node *node)
>       radix_tree_for_each_slot(slot, &node->page_tree, &iter, 0) {
>               page = radix_tree_deref_slot_protected(slot, &node->tree_lock);
>               BUG_ON(!__tcache_page_tree_delete(node, page->index, page));
> -             spin_unlock(&node->tree_lock);
> -
>               tcache_lru_del(page);
>               put_page(page);
> -
> -             local_irq_enable();
> -             cond_resched();
> -             local_irq_disable();
> -
> -             spin_lock(&node->tree_lock);

This might be OK as a temporary solution, but I think we still should
drop the lock periodically, because this iteration can take long (think
of a 10G file cached in tcache) so performing it w/o rescheduling and,
what is worse, with irqs disabled is a no-go IMO.

I would propose to make use of the ability of radix_tree_for_each_slot
to continue iteration from the specified index. Something like this
would do the trick I suppose (NOT TESTED):

diff --git a/mm/tcache.c b/mm/tcache.c
index b8757cf354a9..6a4c45970293 100644
--- a/mm/tcache.c
+++ b/mm/tcache.c
@@ -662,6 +662,10 @@ tcache_invalidate_node_pages(struct tcache_node *node)
        struct radix_tree_iter iter;
        struct page *page;
        void **slot;
+       pgoff_t index = 0;
+       int batch;
+
+#define TCACHE_INVALIDATE_BATCH                128
 
        spin_lock_irq(&node->tree_lock);
 
@@ -674,19 +678,26 @@ tcache_invalidate_node_pages(struct tcache_node *node)
         * Now truncate all pages. Be careful, because pages can still be
         * deleted from this node by the shrinker or by concurrent lookups.
         */
-       radix_tree_for_each_slot(slot, &node->page_tree, &iter, 0) {
+restart:
+       batch = 0;
+       radix_tree_for_each_slot(slot, &node->page_tree, &iter, index) {
                page = radix_tree_deref_slot_protected(slot, &node->tree_lock);
-               BUG_ON(!__tcache_page_tree_delete(node, page->index, page));
-               spin_unlock(&node->tree_lock);
-
+               index = page->index;
+               BUG_ON(!__tcache_page_tree_delete(node, index, page));
                tcache_lru_del(page);
                put_page(page);
 
-               local_irq_enable();
-               cond_resched();
-               local_irq_disable();
-
-               spin_lock(&node->tree_lock);
+               if (++batch > TCACHE_INVALIDATE_BATCH) {
+                       spin_unlock_irq(&node->tree_lock);
+                       cond_resched();
+                       spin_lock_irq(&node->tree_lock);
+                       /*
+                        * Restart iteration over the radix tree, because the
+                        * current node could have been freed when we dropped
+                        * the lock.
+                        */
+                       goto restart;
+               }
        }
 
        BUG_ON(node->nr_pages != 0);
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to