The extent_state structure is used at the core of the extent i/o code
 for managing flags, locking, etc. It requires allocations deep in the
 write code and if failures occur they are difficult to recover from.

 We avoid most of the failures by using a mempool, which can sleep when
 required, to honor the allocations. This allows future patches to convert
 most of the {set,clear,convert}_extent_bit and derivatives to return
 void.

Signed-off-by: Jeff Mahoney <je...@suse.com>
---
 fs/btrfs/extent_io.c |   71 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 20 deletions(-)

Index: source/fs/btrfs/extent_io.c
===================================================================
--- source.orig/fs/btrfs/extent_io.c    2011-11-21 14:13:55.000000000 -0500
+++ source/fs/btrfs/extent_io.c 2011-11-21 14:38:23.000000000 -0500
@@ -12,6 +12,7 @@
 #include <linux/pagevec.h>
 #include <linux/prefetch.h>
 #include <linux/cleancache.h>
+#include <linux/mempool.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "compat.h"
@@ -21,6 +22,8 @@
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
+static mempool_t *extent_state_pool;
+#define EXTENT_STATE_POOL_SIZE (64*1024)
 
 static LIST_HEAD(buffers);
 static LIST_HEAD(states);
@@ -61,18 +64,28 @@ tree_fs_info(struct extent_io_tree *tree
 int __init extent_io_init(void)
 {
        extent_state_cache = kmem_cache_create("extent_state",
-                       sizeof(struct extent_state), 0,
-                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+                                       sizeof(struct extent_state), 0,
+                                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+                                       NULL);
        if (!extent_state_cache)
                return -ENOMEM;
 
+       extent_state_pool = mempool_create_slab_pool(
+                                               EXTENT_STATE_POOL_SIZE /
+                                               sizeof(struct extent_state),
+                                               extent_state_cache);
+       if (!extent_state_pool)
+               goto free_state_cache;
+
        extent_buffer_cache = kmem_cache_create("extent_buffers",
                        sizeof(struct extent_buffer), 0,
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!extent_buffer_cache)
-               goto free_state_cache;
+               goto free_state_mempool;
        return 0;
 
+free_state_mempool:
+       mempool_destroy(extent_state_pool);
 free_state_cache:
        kmem_cache_destroy(extent_state_cache);
        return -ENOMEM;
@@ -103,6 +116,8 @@ void extent_io_exit(void)
                list_del(&eb->leak_list);
                kmem_cache_free(extent_buffer_cache, eb);
        }
+       if (extent_state_pool)
+               mempool_destroy(extent_state_pool);
        if (extent_state_cache)
                kmem_cache_destroy(extent_state_cache);
        if (extent_buffer_cache)
@@ -128,7 +143,7 @@ static struct extent_state *alloc_extent
        unsigned long flags;
 #endif
 
-       state = kmem_cache_alloc(extent_state_cache, mask);
+       state = mempool_alloc(extent_state_pool, mask);
        if (!state)
                return state;
        state->state = 0;
@@ -145,6 +160,12 @@ static struct extent_state *alloc_extent
        return state;
 }
 
+static struct extent_state *alloc_extent_state_nofail(gfp_t mask)
+{
+       BUG_ON(!(mask & __GFP_WAIT));
+       return alloc_extent_state(mask);
+}
+
 void free_extent_state(struct extent_state *state)
 {
        if (!state)
@@ -160,7 +181,7 @@ void free_extent_state(struct extent_sta
                spin_unlock_irqrestore(&leak_lock, flags);
 #endif
                trace_free_extent_state(state, _RET_IP_);
-               kmem_cache_free(extent_state_cache, state);
+               mempool_free(state, extent_state_pool);
        }
 }
 
@@ -437,6 +458,12 @@ static int clear_state_bit(struct extent
        return ret;
 }
 
+static void
+assert_atomic_alloc(struct extent_state *prealloc, gfp_t mask)
+{
+       WARN_ON(!prealloc && (mask & __GFP_WAIT));
+}
+
 static struct extent_state *
 alloc_extent_state_atomic(struct extent_state *prealloc)
 {
@@ -464,6 +491,7 @@ NORET_TYPE void extent_io_tree_panic(str
  * the range [start, end] is inclusive.
  *
  * This takes the tree lock, and returns 0 on success and < 0 on error.
+ * If (mask & __GFP_WAIT) == 0, there are no error conditions.
  */
 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                     int bits, int wake, int delete,
@@ -486,11 +514,8 @@ int clear_extent_bit(struct extent_io_tr
        if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
                clear = 1;
 again:
-       if (!prealloc && (mask & __GFP_WAIT)) {
-               prealloc = alloc_extent_state(mask);
-               if (!prealloc)
-                       return -ENOMEM;
-       }
+       if (!prealloc && (mask & __GFP_WAIT))
+               prealloc = alloc_extent_state_nofail(mask);
 
        spin_lock(&tree->lock);
        if (cached_state) {
@@ -542,6 +567,7 @@ hit_next:
         */
 
        if (state->start < start) {
+               assert_atomic_alloc(prealloc, mask);
                prealloc = alloc_extent_state_atomic(prealloc);
                BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, start);
@@ -566,6 +592,7 @@ hit_next:
         * on the first half
         */
        if (state->start <= end && state->end > end) {
+               assert_atomic_alloc(prealloc, mask);
                prealloc = alloc_extent_state_atomic(prealloc);
                BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, end + 1);
@@ -726,15 +753,14 @@ int set_extent_bit(struct extent_io_tree
        struct extent_state *prealloc = NULL;
        struct rb_node *node;
        int err = 0;
+       int wait = mask & __GFP_WAIT;
        u64 last_start;
        u64 last_end;
 
        bits |= EXTENT_FIRST_DELALLOC;
 again:
-       if (!prealloc && (mask & __GFP_WAIT)) {
-               prealloc = alloc_extent_state(mask);
-               BUG_ON(!prealloc);
-       }
+       if (!prealloc && wait)
+               prealloc = alloc_extent_state_nofail(mask);
 
        spin_lock(&tree->lock);
        if (cached_state && *cached_state) {
@@ -751,6 +777,7 @@ again:
         */
        node = tree_search(tree, start);
        if (!node) {
+               assert_atomic_alloc(prealloc, mask);
                prealloc = alloc_extent_state_atomic(prealloc);
                BUG_ON(!prealloc);
                err = insert_state(tree, prealloc, start, end, &bits);
@@ -820,6 +847,7 @@ hit_next:
                        goto out;
                }
 
+               assert_atomic_alloc(prealloc, mask);
                prealloc = alloc_extent_state_atomic(prealloc);
                BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, start);
@@ -853,6 +881,7 @@ hit_next:
                else
                        this_end = last_start - 1;
 
+               assert_atomic_alloc(prealloc, mask);
                prealloc = alloc_extent_state_atomic(prealloc);
                BUG_ON(!prealloc);
 
@@ -883,6 +912,7 @@ hit_next:
                        goto out;
                }
 
+               assert_atomic_alloc(prealloc, mask);
                prealloc = alloc_extent_state_atomic(prealloc);
                BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, end + 1);
@@ -909,7 +939,7 @@ search_again:
        if (start > end)
                goto out;
        spin_unlock(&tree->lock);
-       if (mask & __GFP_WAIT)
+       if (wait)
                cond_resched();
        goto again;
 }
@@ -940,11 +970,8 @@ int convert_extent_bit(struct extent_io_
        u64 last_end;
 
 again:
-       if (!prealloc && (mask & __GFP_WAIT)) {
-               prealloc = alloc_extent_state(mask);
-               if (!prealloc)
-                       return -ENOMEM;
-       }
+       if (!prealloc && (mask & __GFP_WAIT))
+               prealloc = alloc_extent_state_nofail(mask);
 
        spin_lock(&tree->lock);
        /*
@@ -953,6 +980,7 @@ again:
         */
        node = tree_search(tree, start);
        if (!node) {
+               assert_atomic_alloc(prealloc, mask);
                prealloc = alloc_extent_state_atomic(prealloc);
                if (!prealloc)
                        return -ENOMEM;
@@ -1010,6 +1038,7 @@ hit_next:
         * desired bit on it.
         */
        if (state->start < start) {
+               assert_atomic_alloc(prealloc, mask);
                prealloc = alloc_extent_state_atomic(prealloc);
                if (!prealloc)
                        return -ENOMEM;
@@ -1042,6 +1071,7 @@ hit_next:
                else
                        this_end = last_start - 1;
 
+               assert_atomic_alloc(prealloc, mask);
                prealloc = alloc_extent_state_atomic(prealloc);
                if (!prealloc)
                        return -ENOMEM;
@@ -1069,6 +1099,7 @@ hit_next:
         * on the first half
         */
        if (state->start <= end && state->end > end) {
+               assert_atomic_alloc(prealloc, mask);
                prealloc = alloc_extent_state_atomic(prealloc);
                if (!prealloc)
                        return -ENOMEM;



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to