slub: Use percpu partial free counter

Xunlei Pang Tue, 09 Mar 2021 07:25:57 -0800

The only concern of introducing partial counter is that,
partial_free_objs may cause cache and atomic operation
contention in case of same SLUB concurrent __slab_free().


This patch changes it to be a percpu counter, also replace
the counter variables to avoid cacheline issues.

Tested-by: James Wang <jnw...@linux.alibaba.com>
Reviewed-by: Pekka Enberg <penb...@kernel.org>
Signed-off-by: Xunlei Pang <xlp...@linux.alibaba.com>
---
 mm/slab.h |  6 ++++--
 mm/slub.c | 30 +++++++++++++++++++++++-------
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 817bfa0..c819597 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -546,16 +546,18 @@ struct kmem_cache_node {
 
 #ifdef CONFIG_SLUB
        unsigned long nr_partial;
-       struct list_head partial;
 #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
-       atomic_long_t partial_free_objs;
        unsigned long partial_total_objs;
 #endif
+       struct list_head partial;
 #ifdef CONFIG_SLUB_DEBUG
        atomic_long_t nr_slabs;
        atomic_long_t total_objects;
        struct list_head full;
 #endif
+#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
+       unsigned long __percpu *partial_free_objs;
+#endif
 #endif
 
 };
diff --git a/mm/slub.c b/mm/slub.c
index 3f76b57..b6ec065 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1894,7 +1894,7 @@ static void discard_slab(struct kmem_cache *s, struct 
page *page)
 static inline void
 __update_partial_free(struct kmem_cache_node *n, long delta)
 {
-       atomic_long_add(delta, &n->partial_free_objs);
+       this_cpu_add(*n->partial_free_objs, delta);
 }
 
 static inline void
@@ -2548,11 +2548,16 @@ static unsigned long partial_counter(struct 
kmem_cache_node *n,
        unsigned long ret = 0;
 
        if (item == PARTIAL_FREE) {
-               ret = atomic_long_read(&n->partial_free_objs);
+               ret = per_cpu_sum(*n->partial_free_objs);
+               if ((long)ret < 0)
+                       ret = 0;
        } else if (item == PARTIAL_TOTAL) {
                ret = n->partial_total_objs;
        } else if (item == PARTIAL_INUSE) {
-               ret = n->partial_total_objs - 
atomic_long_read(&n->partial_free_objs);
+               ret = per_cpu_sum(*n->partial_free_objs);
+               if ((long)ret < 0)
+                       ret = 0;
+               ret = n->partial_total_objs - ret;
                if ((long)ret < 0)
                        ret = 0;
        }
@@ -3552,14 +3557,16 @@ static inline int calculate_order(unsigned int size)
        return -ENOSYS;
 }
 
-static void
+static int
 init_kmem_cache_node(struct kmem_cache_node *n)
 {
        n->nr_partial = 0;
        spin_lock_init(&n->list_lock);
        INIT_LIST_HEAD(&n->partial);
 #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
-       atomic_long_set(&n->partial_free_objs, 0);
+       n->partial_free_objs = alloc_percpu(unsigned long);
+       if (!n->partial_free_objs)
+               return -ENOMEM;
        n->partial_total_objs = 0;
 #endif
 #ifdef CONFIG_SLUB_DEBUG
@@ -3567,6 +3574,8 @@ static inline int calculate_order(unsigned int size)
        atomic_long_set(&n->total_objects, 0);
        INIT_LIST_HEAD(&n->full);
 #endif
+
+       return 0;
 }
 
 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
@@ -3626,7 +3635,7 @@ static void early_kmem_cache_node_alloc(int node)
        page->inuse = 1;
        page->frozen = 0;
        kmem_cache_node->node[node] = n;
-       init_kmem_cache_node(n);
+       BUG_ON(init_kmem_cache_node(n) < 0);
        inc_slabs_node(kmem_cache_node, node, page->objects);
 
        /*
@@ -3644,6 +3653,9 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
 
        for_each_kmem_cache_node(s, node, n) {
                s->node[node] = NULL;
+#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
+               free_percpu(n->partial_free_objs);
+#endif
                kmem_cache_free(kmem_cache_node, n);
        }
 }
@@ -3674,7 +3686,11 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
                        return 0;
                }
 
-               init_kmem_cache_node(n);
+               if (init_kmem_cache_node(n) < 0) {
+                       free_kmem_cache_nodes(s);
+                       return 0;
+               }
+
                s->node[node] = n;
        }
        return 1;
-- 
1.8.3.1

[PATCH v3 4/4] mm/slub: Use percpu partial free counter

Reply via email to