On Mon 07-11-16 14:07:39, Johannes Weiner wrote:
> The bug in khugepaged (fixed in the first patch of this series) shows
> that radix tree slot replacement is fragile; and it will become more
> so when not only NULL<->!NULL transitions need to be accounted but
> transitions from and to exceptional entries as well. We need checks.
> 
> Re-implement radix_tree_replace_slot() on top of the sanity-checked
> __radix_tree_replace(). This requires existing callers to also pass
> the radix tree root, but it'll warn us when somebody replaces slots
> with contents that need proper accounting (transitions between NULL
> entries, real entries, exceptional entries) and where a replacement
> through the slot pointer would corrupt the radix tree node counts.
> 
> Suggested-by: Jan Kara <j...@suse.cz>
> Signed-off-by: Johannes Weiner <han...@cmpxchg.org>

The patch looks good. You can add:

Reviewed-by: Jan Kara <j...@suse.cz>

                                                                Honza

> ---
>  arch/s390/mm/gmap.c                   |  2 +-
>  drivers/sh/intc/virq.c                |  2 +-
>  fs/dax.c                              |  4 +--
>  include/linux/radix-tree.h            | 16 ++-------
>  lib/radix-tree.c                      | 64 
> +++++++++++++++++++++++++++--------
>  mm/filemap.c                          |  4 +--
>  mm/khugepaged.c                       |  5 +--
>  mm/migrate.c                          |  4 +--
>  mm/truncate.c                         |  2 +-
>  tools/testing/radix-tree/multiorder.c |  2 +-
>  10 files changed, 64 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> index 3ba622702ce4..ec1f0dedb948 100644
> --- a/arch/s390/mm/gmap.c
> +++ b/arch/s390/mm/gmap.c
> @@ -1015,7 +1015,7 @@ static inline void gmap_insert_rmap(struct gmap *sg, 
> unsigned long vmaddr,
>       if (slot) {
>               rmap->next = radix_tree_deref_slot_protected(slot,
>                                                       &sg->guest_table_lock);
> -             radix_tree_replace_slot(slot, rmap);
> +             radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
>       } else {
>               rmap->next = NULL;
>               radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
> diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c
> index e7899624aa0b..35bbe288ddb4 100644
> --- a/drivers/sh/intc/virq.c
> +++ b/drivers/sh/intc/virq.c
> @@ -254,7 +254,7 @@ static void __init intc_subgroup_map(struct intc_desc_int 
> *d)
>  
>               radix_tree_tag_clear(&d->tree, entry->enum_id,
>                                    INTC_TAG_VIRQ_NEEDS_ALLOC);
> -             radix_tree_replace_slot((void **)entries[i],
> +             radix_tree_replace_slot(&d->tree, (void **)entries[i],
>                                       &intc_irq_xlate[irq]);
>       }
>  
> diff --git a/fs/dax.c b/fs/dax.c
> index db78bae0dc0f..85930c2a2749 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space 
> *mapping, void **slot)
>               radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
>  
>       entry |= RADIX_DAX_ENTRY_LOCK;
> -     radix_tree_replace_slot(slot, (void *)entry);
> +     radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
>       return (void *)entry;
>  }
>  
> @@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space 
> *mapping, void **slot)
>               radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
>  
>       entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
> -     radix_tree_replace_slot(slot, (void *)entry);
> +     radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
>       return (void *)entry;
>  }
>  
> diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
> index 7ced8a70cc8b..2d1b9b8be983 100644
> --- a/include/linux/radix-tree.h
> +++ b/include/linux/radix-tree.h
> @@ -249,20 +249,6 @@ static inline int radix_tree_exception(void *arg)
>       return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
>  }
>  
> -/**
> - * radix_tree_replace_slot   - replace item in a slot
> - * @pslot:   pointer to slot, returned by radix_tree_lookup_slot
> - * @item:    new item to store in the slot.
> - *
> - * For use with radix_tree_lookup_slot().  Caller must hold tree write locked
> - * across slot lookup and replacement.
> - */
> -static inline void radix_tree_replace_slot(void **pslot, void *item)
> -{
> -     BUG_ON(radix_tree_is_internal_node(item));
> -     rcu_assign_pointer(*pslot, item);
> -}
> -
>  int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
>                       unsigned order, struct radix_tree_node **nodep,
>                       void ***slotp);
> @@ -280,6 +266,8 @@ void **radix_tree_lookup_slot(struct radix_tree_root *, 
> unsigned long);
>  void __radix_tree_replace(struct radix_tree_root *root,
>                         struct radix_tree_node *node,
>                         void **slot, void *item);
> +void radix_tree_replace_slot(struct radix_tree_root *root,
> +                          void **slot, void *item);
>  bool __radix_tree_delete_node(struct radix_tree_root *root,
>                             struct radix_tree_node *node);
>  void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void 
> *);
> diff --git a/lib/radix-tree.c b/lib/radix-tree.c
> index ddc6facbf4da..5cbdd911931e 100644
> --- a/lib/radix-tree.c
> +++ b/lib/radix-tree.c
> @@ -753,19 +753,10 @@ void *radix_tree_lookup(struct radix_tree_root *root, 
> unsigned long index)
>  }
>  EXPORT_SYMBOL(radix_tree_lookup);
>  
> -/**
> - * __radix_tree_replace              - replace item in a slot
> - * @root:    radix tree root
> - * @node:    pointer to tree node
> - * @slot:    pointer to slot in @node
> - * @item:    new item to store in the slot.
> - *
> - * For use with __radix_tree_lookup().  Caller must hold tree write locked
> - * across slot lookup and replacement.
> - */
> -void __radix_tree_replace(struct radix_tree_root *root,
> -                       struct radix_tree_node *node,
> -                       void **slot, void *item)
> +static void replace_slot(struct radix_tree_root *root,
> +                      struct radix_tree_node *node,
> +                      void **slot, void *item,
> +                      bool warn_typeswitch)
>  {
>       void *old = rcu_dereference_raw(*slot);
>       int count, exceptional;
> @@ -776,8 +767,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
>       exceptional = !!radix_tree_exceptional_entry(item) -
>                     !!radix_tree_exceptional_entry(old);
>  
> -     WARN_ONCE(!node && slot != (void **)&root->rnode &&
> -               (count || exceptional),
> +     WARN_ONCE(warn_typeswitch && (count || exceptional),
>                 "Unaccounted slot replacement: old=%p item=%p count=%d 
> exceptional=%d\n",
>                 old, item, count, exceptional);
>  
> @@ -789,6 +779,50 @@ void __radix_tree_replace(struct radix_tree_root *root,
>  }
>  
>  /**
> + * __radix_tree_replace              - replace item in a slot
> + * @root:    radix tree root
> + * @node:    pointer to tree node
> + * @slot:    pointer to slot in @node
> + * @item:    new item to store in the slot.
> + *
> + * For use with __radix_tree_lookup().  Caller must hold tree write locked
> + * across slot lookup and replacement.
> + */
> +void __radix_tree_replace(struct radix_tree_root *root,
> +                       struct radix_tree_node *node,
> +                       void **slot, void *item)
> +{
> +     /*
> +      * This function supports replacing entries with different types
> +      * (NULL, regular entries, exceptional entries), but that needs
> +      * accounting against the node - unless the slot is root->rnode.
> +      */
> +     replace_slot(root, node, slot, item,
> +                  !node && slot != (void **)&root->rnode);
> +}
> +
> +/**
> + * radix_tree_replace_slot   - replace item in a slot
> + * @root:    radix tree root
> + * @slot:    pointer to slot
> + * @item:    new item to store in the slot.
> + *
> + * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
> + * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
> + * across slot lookup and replacement.
> + *
> + * NOTE: This cannot be used to switch between non-entries (empty slots),
> + * regular entries, and exceptional entries, as that requires accounting
> + * inside the radix tree node. When switching from one type of entry to
> + * another, use __radix_tree_lookup() and __radix_tree_replace().
> + */
> +void radix_tree_replace_slot(struct radix_tree_root *root,
> +                          void **slot, void *item)
> +{
> +     replace_slot(root, NULL, slot, item, true);
> +}
> +
> +/**
>   *   radix_tree_tag_set - set a tag on a radix tree node
>   *   @root:          radix tree root
>   *   @index:         index key
> diff --git a/mm/filemap.c b/mm/filemap.c
> index c7fe2f16503f..eb463156f29a 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -147,7 +147,7 @@ static int page_cache_tree_insert(struct address_space 
> *mapping,
>                                                     false);
>               }
>       }
> -     radix_tree_replace_slot(slot, page);
> +     radix_tree_replace_slot(&mapping->page_tree, slot, page);
>       mapping->nrpages++;
>       if (node) {
>               workingset_node_pages_inc(node);
> @@ -193,7 +193,7 @@ static void page_cache_tree_delete(struct address_space 
> *mapping,
>                       shadow = NULL;
>               }
>  
> -             radix_tree_replace_slot(slot, shadow);
> +             radix_tree_replace_slot(&mapping->page_tree, slot, shadow);
>  
>               if (!node)
>                       break;
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index eac6f0580e26..fed8d5e96978 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1421,7 +1421,7 @@ static void collapse_shmem(struct mm_struct *mm,
>               list_add_tail(&page->lru, &pagelist);
>  
>               /* Finally, replace with the new page. */
> -             radix_tree_replace_slot(slot,
> +             radix_tree_replace_slot(&mapping->page_tree, slot,
>                               new_page + (index % HPAGE_PMD_NR));
>  
>               index++;
> @@ -1531,7 +1531,8 @@ static void collapse_shmem(struct mm_struct *mm,
>                       /* Unfreeze the page. */
>                       list_del(&page->lru);
>                       page_ref_unfreeze(page, 2);
> -                     radix_tree_replace_slot(slot, page);
> +                     radix_tree_replace_slot(&mapping->page_tree,
> +                                             slot, page);
>                       spin_unlock_irq(&mapping->tree_lock);
>                       putback_lru_page(page);
>                       unlock_page(page);
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 99250aee1ac1..9b88e4e37d0a 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -482,7 +482,7 @@ int migrate_page_move_mapping(struct address_space 
> *mapping,
>               SetPageDirty(newpage);
>       }
>  
> -     radix_tree_replace_slot(pslot, newpage);
> +     radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
>  
>       /*
>        * Drop cache reference from old page by unfreezing
> @@ -556,7 +556,7 @@ int migrate_huge_page_move_mapping(struct address_space 
> *mapping,
>  
>       get_page(newpage);
>  
> -     radix_tree_replace_slot(pslot, newpage);
> +     radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
>  
>       page_ref_unfreeze(page, expected_count - 1);
>  
> diff --git a/mm/truncate.c b/mm/truncate.c
> index a01cce450a26..6ae44571d4c7 100644
> --- a/mm/truncate.c
> +++ b/mm/truncate.c
> @@ -49,7 +49,7 @@ static void clear_exceptional_entry(struct address_space 
> *mapping,
>               goto unlock;
>       if (*slot != entry)
>               goto unlock;
> -     radix_tree_replace_slot(slot, NULL);
> +     radix_tree_replace_slot(&mapping->page_tree, slot, NULL);
>       mapping->nrexceptional--;
>       if (!node)
>               goto unlock;
> diff --git a/tools/testing/radix-tree/multiorder.c 
> b/tools/testing/radix-tree/multiorder.c
> index 05d7bc488971..d1be94667a30 100644
> --- a/tools/testing/radix-tree/multiorder.c
> +++ b/tools/testing/radix-tree/multiorder.c
> @@ -146,7 +146,7 @@ static void multiorder_check(unsigned long index, int 
> order)
>  
>       slot = radix_tree_lookup_slot(&tree, index);
>       free(*slot);
> -     radix_tree_replace_slot(slot, item2);
> +     radix_tree_replace_slot(&tree, slot, item2);
>       for (i = min; i < max; i++) {
>               struct item *item = item_lookup(&tree, i);
>               assert(item != 0);
> -- 
> 2.10.1
> 
-- 
Jan Kara <j...@suse.com>
SUSE Labs, CR

Reply via email to