In the file descriptor table duplication code (called at fork()), we
need to duplicate an IDR.  But we have to do it under a lock (so another
thread doesn't open/close a fd in the middle), and there's no suitable
preload operation for this today.  Adding just idr_copy_preload() isn't
enough as another thread could grow the fd table between starting the
preload and acquiring the lock.  We also need idr_check_preload() to be
called after acquiring the lock.

Signed-off-by: Matthew Wilcox <mawil...@microsoft.com>
---
 include/linux/idr.h                         | 32 +++++++++++
 include/linux/radix-tree.h                  |  3 ++
 lib/radix-tree.c                            | 83 +++++++++++++++++++++++++++++
 tools/testing/radix-tree/idr-test.c         | 24 +++++++++
 tools/testing/radix-tree/linux/radix-tree.h |  2 +
 tools/testing/radix-tree/main.c             | 38 +++++++++++++
 6 files changed, 182 insertions(+)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index d43cf01..eed1c1a 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -166,6 +166,38 @@ static inline bool idr_is_empty(const struct idr *idr)
 }
 
 /**
+ * idr_copy_preload - preload for idr_copy()
+ * @src: IDR to be copied from
+ * @gfp: Allocation mask to use for preloading
+ *
+ * Preallocates enough memory for a call to idr_copy().  This function
+ * returns with preemption disabled.  Call idr_preload_end() once the
+ * copy has completed.
+ *
+ * Return: -ENOMEM if the memory could not be allocated.
+ */
+static inline int idr_copy_preload(const struct idr *src, gfp_t gfp)
+{
+       return radix_tree_copy_preload(&src->idr_rt, gfp);
+}
+
+/**
+ * idr_check_preload - Check the preload is still sufficient
+ * @src: IDR to be copied from
+ *
+ * Between the successful allocation of memory and acquiring the lock that
+ * protects @src, the IDR may have expanded.  If this function returns
+ * false, more memory needs to be preallocated.
+ *
+ * Return: true if enough memory remains allocated, false to retry the
+ * preallocation.
+ */
+static inline bool idr_check_preload(const struct idr *src)
+{
+       return radix_tree_check_preload(&src->idr_rt);
+}
+
+/**
  * idr_preload_end - end preload section started with idr_preload()
  *
  * Each idr_preload() should be matched with an invocation of this
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index f701e0b..f53d004 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -354,6 +354,9 @@ static inline void radix_tree_preload_end(void)
        preempt_enable();
 }
 
+int radix_tree_copy_preload(const struct radix_tree_root *, gfp_t);
+bool radix_tree_check_preload(const struct radix_tree_root *);
+
 int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t);
 int radix_tree_split(struct radix_tree_root *, unsigned long index,
                        unsigned new_order);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 855ac8e..c1d75224 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -277,6 +277,55 @@ static unsigned long next_index(unsigned long index,
        return (index & ~node_maxindex(node)) + (offset << node->shift);
 }
 
+/**
+ * radix_tree_count_nodes - Returns the number of nodes in this tree
+ * @root: radix tree root
+ *
+ * This routine does not examine every node in the tree; it assumes that
+ * all entries in the tree at level 1 are nodes.  It will overestimate
+ * the number of nodes in the tree in the presence of entries of order
+ * RADIX_TREE_MAP_SHIFT or higher.
+ */
+static unsigned long radix_tree_count_nodes(const struct radix_tree_root *root)
+{
+       struct radix_tree_node *node, *child;
+       unsigned long n = 1;
+       void *entry = rcu_dereference_raw(root->rnode);
+       unsigned int offset = 0;
+
+       if (!radix_tree_is_internal_node(entry))
+               return 0;
+
+       node = entry_to_node(entry);
+       if (!node->shift)
+               return n;
+
+       n += node->count;
+       if (node->shift == RADIX_TREE_MAP_SHIFT)
+               return n;
+
+       while (node) {
+               if (offset == RADIX_TREE_MAP_SIZE) {
+                       offset = node->offset + 1;
+                       node = node->parent;
+                       continue;
+               }
+
+               entry = rcu_dereference_raw(node->slots[offset]);
+               offset++;
+               if (!radix_tree_is_internal_node(entry))
+                       continue;
+               child = entry_to_node(entry);
+               n += child->count;
+               if (node->shift <= 2 * RADIX_TREE_MAP_SHIFT)
+                       continue;
+               offset = 0;
+               node = child;
+       }
+
+       return n;
+}
+
 #ifndef __KERNEL__
 static void dump_node(struct radix_tree_node *node, unsigned long index)
 {
@@ -530,6 +579,40 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(radix_tree_maybe_preload);
 
+/**
+ * radix_tree_copy_preload - preload for radix_tree_copy()
+ * @src: radix tree root to be copied from
+ * @gfp: Allocation mask to use for preloading
+ *
+ * Preallocates enough memory for a call to radix_tree_copy().  This function
+ * returns with preemption disabled.  Call radix_tree_preload_end() once the
+ * copy has completed.
+ *
+ * Return: -ENOMEM if the memory could not be allocated.
+ */
+int radix_tree_copy_preload(const struct radix_tree_root *src, gfp_t gfp_mask)
+{
+       return __radix_tree_preload(gfp_mask, radix_tree_count_nodes(src));
+}
+
+/**
+ * radix_tree_check_preload - Check the preload is still sufficient
+ * @src: radix tree to be copied from
+ * @cookie: Cookie returned from radix_tree_copy_preload()
+ *
+ * Between the successful allocation of memory and acquiring the lock that
+ * protects @src, the radix tree may have expanded.  Call this function
+ * to see if the preallocation needs to be expanded too.
+ *
+ * Return: true if enough memory remains allocated, false if it does not
+ * suffice.
+ */
+bool radix_tree_check_preload(const struct radix_tree_root *src)
+{
+       struct radix_tree_preload *rtp = this_cpu_ptr(&radix_tree_preloads);
+       return rtp->nr >= radix_tree_count_nodes(src);
+}
+
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 /*
  * Preload with enough objects to ensure that we can split a single entry
diff --git a/tools/testing/radix-tree/idr-test.c 
b/tools/testing/radix-tree/idr-test.c
index 3f9f429..e8d5386 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -225,6 +225,29 @@ void idr_get_next_test(void)
        idr_destroy(&idr);
 }
 
+void idr_copy_test(void)
+{
+       DEFINE_IDR(src);
+       DEFINE_IDR(dst);
+       int i;
+       void *p;
+
+       for (i = 0; i < 10000; i++) {
+               struct item *item = item_create(i, 0);
+               assert(idr_alloc(&src, item, 0, 20000, GFP_KERNEL) == i);
+       }
+
+       idr_copy_preload(&src, GFP_KERNEL);
+       idr_for_each_entry(&src, p, i) {
+               assert(idr_alloc(&dst, p, i, i + 1, GFP_NOWAIT) == i);
+       }
+       idr_preload_end();
+
+       idr_for_each(&src, item_idr_free, NULL);
+       idr_destroy(&src);
+       idr_destroy(&dst);
+}
+
 void idr_checks(void)
 {
        unsigned long i;
@@ -276,6 +299,7 @@ void idr_checks(void)
        idr_tag_test();
        idr_nowait_test();
        idr_get_next_test();
+       idr_copy_test();
 }
 
 /*
diff --git a/tools/testing/radix-tree/linux/radix-tree.h 
b/tools/testing/radix-tree/linux/radix-tree.h
index bf1bb23..0c5885e 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -4,6 +4,8 @@
 #include "generated/map-shift.h"
 #include "../../../../include/linux/radix-tree.h"
 
+unsigned long radix_tree_count_nodes(const struct radix_tree_root *root);
+
 extern int kmalloc_verbose;
 extern int test_verbose;
 
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index bc9a784..a7504e2 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -11,6 +11,42 @@
 #include "test.h"
 #include "regression.h"
 
+static void count_node_check(void)
+{
+       unsigned long i, j, k;
+       RADIX_TREE(tree, GFP_KERNEL);
+
+       assert(radix_tree_empty(&tree));
+       assert(radix_tree_count_nodes(&tree) == 0);
+       assert(item_insert(&tree, 0) == 0);
+       assert(radix_tree_count_nodes(&tree) == 0);
+
+       for (i = 1; i < RADIX_TREE_MAP_SIZE; i++) {
+               assert(item_insert(&tree, i) == 0);
+               assert(radix_tree_count_nodes(&tree) == 1);
+       }
+
+       j = 3;
+       k = 3;
+       for (i = RADIX_TREE_MAP_SIZE; i < i * RADIX_TREE_MAP_SIZE;
+                       i *= RADIX_TREE_MAP_SIZE) {
+               assert(item_insert(&tree, i) == 0);
+               assert(radix_tree_count_nodes(&tree) == j);
+               j += k;
+               k++;
+       }
+
+       assert(item_insert(&tree, i) == 0);
+       assert(radix_tree_count_nodes(&tree) == j);
+
+       item_kill_tree(&tree);
+}
+
+void basic_checks(void)
+{
+       count_node_check();
+}
+
 void __gang_check(unsigned long middle, long down, long up, int chunk, int hop)
 {
        long idx;
@@ -362,6 +398,8 @@ int main(int argc, char **argv)
        rcu_register_thread();
        radix_tree_init();
 
+       basic_checks();
+
        regression1_test();
        regression2_test();
        regression3_test();
-- 
1.8.3.1

Reply via email to