On Mon, 10 Aug 2020, Xunlei Pang wrote:

>
> diff --git a/mm/slab.h b/mm/slab.h
> index c85e2fa..a709a70 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -616,7 +616,7 @@ struct kmem_cache_node {
>  #ifdef CONFIG_SLUB
>       unsigned long nr_partial;
>       struct list_head partial;
> -     atomic_long_t partial_free_objs;
> +     atomic_long_t __percpu *partial_free_objs;

A percpu counter is never atomic. Just use unsigned long and use this_cpu
operations for this thing. That should cut down further on the overhead.

> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1775,11 +1775,21 @@ static void discard_slab(struct kmem_cache *s, struct 
> page *page)
>  /*
>   * Management of partially allocated slabs.
>   */
> +static inline long get_partial_free(struct kmem_cache_node *n)
> +{
> +     long nr = 0;
> +     int cpu;
> +
> +     for_each_possible_cpu(cpu)
> +             nr += atomic_long_read(per_cpu_ptr(n->partial_free_objs, cpu));

this_cpu_read(*n->partial_free_objs)

>  static inline void
>  __update_partial_free(struct kmem_cache_node *n, long delta)
>  {
> -     atomic_long_add(delta, &n->partial_free_objs);
> +     atomic_long_add(delta, this_cpu_ptr(n->partial_free_objs));

this_cpu_add()

and so on.

Reply via email to