Re: [f2fs-dev] [PATCH v4 46/48] mm: shrinker: make memcg slab shrink lockless

2023-08-08 Thread Qi Zheng via Linux-f2fs-devel

Hi Dave,

On 2023/8/8 10:44, Dave Chinner wrote:

On Mon, Aug 07, 2023 at 07:09:34PM +0800, Qi Zheng wrote:

Like global slab shrink, this commit also uses refcount+RCU method to make
memcg slab shrink lockless.


This patch does random code cleanups amongst the actual RCU changes.
Can you please move the cleanups to a spearate patch to reduce the
noise in this one?


Sure, will do.




diff --git a/mm/shrinker.c b/mm/shrinker.c
index d318f5621862..fee6f62904fb 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -107,6 +107,12 @@ static struct shrinker_info 
*shrinker_info_protected(struct mem_cgroup *memcg,
 lockdep_is_held(&shrinker_rwsem));
  }
  
+static struct shrinker_info *shrinker_info_rcu(struct mem_cgroup *memcg,

+  int nid)
+{
+   return rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+}


This helper doesn't add value. It doesn't tell me that
rcu_read_lock() needs to be held when it is called, for one


How about adding a comment or an assertion here?




  static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size,
int old_size, int new_nr_max)
  {
@@ -198,7 +204,7 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, 
int shrinker_id)
struct shrinker_info_unit *unit;
  
  		rcu_read_lock();

-   info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+   info = shrinker_info_rcu(memcg, nid);


... whilst the original code here was obviously correct.


unit = info->unit[shriner_id_to_index(shrinker_id)];
if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
/* Pairs with smp mb in shrink_slab() */
@@ -211,7 +217,7 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, 
int shrinker_id)
  
  static DEFINE_IDR(shrinker_idr);
  
-static int prealloc_memcg_shrinker(struct shrinker *shrinker)

+static int shrinker_memcg_alloc(struct shrinker *shrinker)


Cleanups in a separate patch.


OK.




@@ -253,10 +258,15 @@ static long xchg_nr_deferred_memcg(int nid, struct 
shrinker *shrinker,
  {
struct shrinker_info *info;
struct shrinker_info_unit *unit;
+   long nr_deferred;
  
-	info = shrinker_info_protected(memcg, nid);

+   rcu_read_lock();
+   info = shrinker_info_rcu(memcg, nid);
unit = info->unit[shriner_id_to_index(shrinker->id)];
-   return 
atomic_long_xchg(&unit->nr_deferred[shriner_id_to_offset(shrinker->id)], 0);
+   nr_deferred = 
atomic_long_xchg(&unit->nr_deferred[shriner_id_to_offset(shrinker->id)], 0);
+   rcu_read_unlock();
+
+   return nr_deferred;
  }


This adds two rcu_read_lock() sections to every call to
do_shrink_slab(). It's not at all clear ifrom any of the other code
that do_shrink_slab() now has internal rcu_read_lock() sections


The xchg_nr_deferred_memcg() will only be called in shrink_slab_memcg(),
so other code doesn't need to know that information?




@@ -464,18 +480,23 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, 
int nid,
if (!mem_cgroup_online(memcg))
return 0;
  
-	if (!down_read_trylock(&shrinker_rwsem))

-   return 0;
-
-   info = shrinker_info_protected(memcg, nid);
+again:
+   rcu_read_lock();
+   info = shrinker_info_rcu(memcg, nid);
if (unlikely(!info))
goto unlock;
  
-	for (; index < shriner_id_to_index(info->map_nr_max); index++) {

+   if (index < shriner_id_to_index(info->map_nr_max)) {
struct shrinker_info_unit *unit;
  
  		unit = info->unit[index];
  
+		/*

+* The shrinker_info_unit will not be freed, so we can
+* safely release the RCU lock here.
+*/
+   rcu_read_unlock();


Why - what guarantees that the shrinker_info_unit exists at this
point? We hold no reference to it, we hold no reference to any
shrinker, etc. What provides this existence guarantee?


The shrinker_info_unit is never freed unless the memcg is destroyed.
Here we hold the refcount of this memcg (mem_cgroup_iter() -->
css_tryget()), so the shrinker_info_unit will not be freed.




+
for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
@@ -485,12 +506,14 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, 
int nid,
struct shrinker *shrinker;
int shrinker_id = calc_shrinker_id(index, offset);
  
+			rcu_read_lock();

shrinker = idr_find(&shrinker_idr, shrinker_id);
-   if (unlikely(!shrinker || !(shrinker->flags & 
SHRINKER_REGISTERED))) {
-   if (!shrinker)
-   clear_bit(offset, unit->map);
+   if (unl

Re: [f2fs-dev] [PATCH v4 46/48] mm: shrinker: make memcg slab shrink lockless

2023-08-07 Thread Dave Chinner via Linux-f2fs-devel
On Mon, Aug 07, 2023 at 07:09:34PM +0800, Qi Zheng wrote:
> Like global slab shrink, this commit also uses refcount+RCU method to make
> memcg slab shrink lockless.

This patch does random code cleanups amongst the actual RCU changes.
Can you please move the cleanups to a spearate patch to reduce the
noise in this one?

> diff --git a/mm/shrinker.c b/mm/shrinker.c
> index d318f5621862..fee6f62904fb 100644
> --- a/mm/shrinker.c
> +++ b/mm/shrinker.c
> @@ -107,6 +107,12 @@ static struct shrinker_info 
> *shrinker_info_protected(struct mem_cgroup *memcg,
>lockdep_is_held(&shrinker_rwsem));
>  }
>  
> +static struct shrinker_info *shrinker_info_rcu(struct mem_cgroup *memcg,
> +int nid)
> +{
> + return rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
> +}

This helper doesn't add value. It doesn't tell me that
rcu_read_lock() needs to be held when it is called, for one

>  static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size,
>   int old_size, int new_nr_max)
>  {
> @@ -198,7 +204,7 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, 
> int shrinker_id)
>   struct shrinker_info_unit *unit;
>  
>   rcu_read_lock();
> - info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
> + info = shrinker_info_rcu(memcg, nid);

... whilst the original code here was obviously correct.

>   unit = info->unit[shriner_id_to_index(shrinker_id)];
>   if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
>   /* Pairs with smp mb in shrink_slab() */
> @@ -211,7 +217,7 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, 
> int shrinker_id)
>  
>  static DEFINE_IDR(shrinker_idr);
>  
> -static int prealloc_memcg_shrinker(struct shrinker *shrinker)
> +static int shrinker_memcg_alloc(struct shrinker *shrinker)

Cleanups in a separate patch.

> @@ -253,10 +258,15 @@ static long xchg_nr_deferred_memcg(int nid, struct 
> shrinker *shrinker,
>  {
>   struct shrinker_info *info;
>   struct shrinker_info_unit *unit;
> + long nr_deferred;
>  
> - info = shrinker_info_protected(memcg, nid);
> + rcu_read_lock();
> + info = shrinker_info_rcu(memcg, nid);
>   unit = info->unit[shriner_id_to_index(shrinker->id)];
> - return 
> atomic_long_xchg(&unit->nr_deferred[shriner_id_to_offset(shrinker->id)], 0);
> + nr_deferred = 
> atomic_long_xchg(&unit->nr_deferred[shriner_id_to_offset(shrinker->id)], 0);
> + rcu_read_unlock();
> +
> + return nr_deferred;
>  }

This adds two rcu_read_lock() sections to every call to
do_shrink_slab(). It's not at all clear ifrom any of the other code
that do_shrink_slab() now has internal rcu_read_lock() sections

> @@ -464,18 +480,23 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, 
> int nid,
>   if (!mem_cgroup_online(memcg))
>   return 0;
>  
> - if (!down_read_trylock(&shrinker_rwsem))
> - return 0;
> -
> - info = shrinker_info_protected(memcg, nid);
> +again:
> + rcu_read_lock();
> + info = shrinker_info_rcu(memcg, nid);
>   if (unlikely(!info))
>   goto unlock;
>  
> - for (; index < shriner_id_to_index(info->map_nr_max); index++) {
> + if (index < shriner_id_to_index(info->map_nr_max)) {
>   struct shrinker_info_unit *unit;
>  
>   unit = info->unit[index];
>  
> + /*
> +  * The shrinker_info_unit will not be freed, so we can
> +  * safely release the RCU lock here.
> +  */
> + rcu_read_unlock();

Why - what guarantees that the shrinker_info_unit exists at this
point? We hold no reference to it, we hold no reference to any
shrinker, etc. What provides this existence guarantee?

> +
>   for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) {
>   struct shrink_control sc = {
>   .gfp_mask = gfp_mask,
> @@ -485,12 +506,14 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, 
> int nid,
>   struct shrinker *shrinker;
>   int shrinker_id = calc_shrinker_id(index, offset);
>  
> + rcu_read_lock();
>   shrinker = idr_find(&shrinker_idr, shrinker_id);
> - if (unlikely(!shrinker || !(shrinker->flags & 
> SHRINKER_REGISTERED))) {
> - if (!shrinker)
> - clear_bit(offset, unit->map);
> + if (unlikely(!shrinker || !shrinker_try_get(shrinker))) 
> {
> + clear_bit(offset, unit->map);
> + rcu_read_unlock();
>   continue;
>   }
> + rcu_read_unlock();
>  
>   /* Call non-slab shri

[f2fs-dev] [PATCH v4 46/48] mm: shrinker: make memcg slab shrink lockless

2023-08-07 Thread Qi Zheng via Linux-f2fs-devel
Like global slab shrink, this commit also uses refcount+RCU method to make
memcg slab shrink lockless.

Use the following script to do slab shrink stress test:

```

DIR="/root/shrinker/memcg/mnt"

do_create()
{
mkdir -p /sys/fs/cgroup/memory/test
echo 4G > /sys/fs/cgroup/memory/test/memory.limit_in_bytes
for i in `seq 0 $1`;
do
mkdir -p /sys/fs/cgroup/memory/test/$i;
echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs;
mkdir -p $DIR/$i;
done
}

do_mount()
{
for i in `seq $1 $2`;
do
mount -t tmpfs $i $DIR/$i;
done
}

do_touch()
{
for i in `seq $1 $2`;
do
echo $$ > /sys/fs/cgroup/memory/test/$i/cgroup.procs;
dd if=/dev/zero of=$DIR/$i/file$i bs=1M count=1 &
done
}

case "$1" in
  touch)
do_touch $2 $3
;;
  test)
do_create 4000
do_mount 0 4000
do_touch 0 3000
;;
  *)
exit 1
;;
esac
```

Save the above script, then run test and touch commands. Then we can use
the following perf command to view hotspots:

perf top -U -F 999

1) Before applying this patchset:

  40.44%  [kernel][k] down_read_trylock
  17.59%  [kernel][k] up_read
  13.64%  [kernel][k] pv_native_safe_halt
  11.90%  [kernel][k] shrink_slab
   8.21%  [kernel][k] idr_find
   2.71%  [kernel][k] _find_next_bit
   1.36%  [kernel][k] shrink_node
   0.81%  [kernel][k] shrink_lruvec
   0.80%  [kernel][k] __radix_tree_lookup
   0.50%  [kernel][k] do_shrink_slab
   0.21%  [kernel][k] list_lru_count_one
   0.16%  [kernel][k] mem_cgroup_iter

2) After applying this patchset:

  60.17%  [kernel]   [k] shrink_slab
  20.42%  [kernel]   [k] pv_native_safe_halt
   3.03%  [kernel]   [k] do_shrink_slab
   2.73%  [kernel]   [k] shrink_node
   2.27%  [kernel]   [k] shrink_lruvec
   2.00%  [kernel]   [k] __rcu_read_unlock
   1.92%  [kernel]   [k] mem_cgroup_iter
   0.98%  [kernel]   [k] __rcu_read_lock
   0.91%  [kernel]   [k] osq_lock
   0.63%  [kernel]   [k] mem_cgroup_calculate_protection
   0.55%  [kernel]   [k] shrinker_put
   0.46%  [kernel]   [k] list_lru_count_one

We can see that the first perf hotspot becomes shrink_slab, which is what
we expect.

Signed-off-by: Qi Zheng 
---
 mm/shrinker.c | 80 ++-
 1 file changed, 54 insertions(+), 26 deletions(-)

diff --git a/mm/shrinker.c b/mm/shrinker.c
index d318f5621862..fee6f62904fb 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -107,6 +107,12 @@ static struct shrinker_info 
*shrinker_info_protected(struct mem_cgroup *memcg,
 lockdep_is_held(&shrinker_rwsem));
 }
 
+static struct shrinker_info *shrinker_info_rcu(struct mem_cgroup *memcg,
+  int nid)
+{
+   return rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+}
+
 static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size,
int old_size, int new_nr_max)
 {
@@ -198,7 +204,7 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, 
int shrinker_id)
struct shrinker_info_unit *unit;
 
rcu_read_lock();
-   info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+   info = shrinker_info_rcu(memcg, nid);
unit = info->unit[shriner_id_to_index(shrinker_id)];
if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
/* Pairs with smp mb in shrink_slab() */
@@ -211,7 +217,7 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, 
int shrinker_id)
 
 static DEFINE_IDR(shrinker_idr);
 
-static int prealloc_memcg_shrinker(struct shrinker *shrinker)
+static int shrinker_memcg_alloc(struct shrinker *shrinker)
 {
int id, ret = -ENOMEM;
 
@@ -219,7 +225,6 @@ static int prealloc_memcg_shrinker(struct shrinker 
*shrinker)
return -ENOSYS;
 
down_write(&shrinker_rwsem);
-   /* This may call shrinker, so it must use down_read_trylock() */
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
if (id < 0)
goto unlock;
@@ -237,7 +242,7 @@ static int prealloc_memcg_shrinker(struct shrinker 
*shrinker)
return ret;
 }
 
-static void unregister_memcg_shrinker(struct shrinker *shrinker)
+static void shrinker_memcg_remove(struct shrinker *shrinker)
 {
int id = shrinker->id;
 
@@ -253,10 +258,15 @@ static long xchg_nr_deferred_memcg(int nid, struct 
shrinker *shrinker,
 {
struct shrinker_info *info;
struct shrinker_info_unit *unit;
+   long nr_deferred;
 
-   info = shrinker_info_protected(memcg, nid);
+   rcu_read_lock();
+   info = shrinker_info_rcu(memcg, nid);
unit = info->unit[shriner_id_to_index(s