Andrey, I'm waiting for your approval here.
On 8/20/20 5:51 PM, Valeriy Vdovin wrote: > Bug https://jira.sw.ru/browse/PSBM-99181 has introduced a problem: when > the kernel has opened NFS delegations and NFS server is not accessible > at the time when NFS shrinker is called, the whole shrinker list > execution gets stuck until NFS server is back. Being a problem in itself > it also introduces bigger problem - during that hang, the shrinker_rwsem > also gets locked, consequently no new mounts can be done at that time > because new superblock tries to register it's own shrinker and also gets > stuck at aquiring shrinker_rwsem. > > Commit 9e9e35d050955648449498827deb2d43be0564e1 is a workaround for that > problem. It is known that during signle shrinker execution we do not > actually need to hold shrinker_rwsem so we release and reacqiure the > rwsem for each shrinker in the list. > > Because of this workaround shrink_slab function now experiences a major > slowdown, because shrinker_rwsem gets accessed for each shrinker in the > list twice. On an idle fresh-booted system shrinker_list could be > iterated up to 1600 times a second, although originally the problem was > local to only one NFS shrinker. > > This patch fixes commit 9e9e35d050955648449498827deb2d43be0564e1 in a > way that before calling for up_read for shrinker_rwsem, we check that > this is really an NFS shrinker by checking NFS magic in superblock, if > it is accessible from shrinker. > > https://jira.sw.ru/browse/PSBM-99181 > > Co-authored-by: Andrey Ryabinin <aryabi...@virtuozzo.com> > Signed-off-by: Valeriy Vdovin <valeriy.vdo...@virtuozzo.com> > > Changes: > v2: Added missing 'rwsem_is_contented' check > --- > fs/super.c | 2 +- > mm/vmscan.c | 65 > ++++++++++++++++++++++++++++++++++++++++++++++--------------- > 2 files changed, 50 insertions(+), 17 deletions(-) > > diff --git a/fs/super.c b/fs/super.c > index f131d14..1cf377a 100644 > --- a/fs/super.c > +++ b/fs/super.c > @@ -80,7 +80,7 @@ EXPORT_SYMBOL(dcache_is_low); > * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we > * take a passive reference to the superblock to avoid this from occurring. > */ > -static unsigned long super_cache_scan(struct shrinker *shrink, > +unsigned long super_cache_scan(struct shrinker *shrink, > struct shrink_control *sc) > { > struct super_block *sb; > diff --git a/mm/vmscan.c b/mm/vmscan.c > index d7082d2..4fa86e7 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -453,6 +453,20 @@ static unsigned long do_shrink_slab(struct > shrink_control *shrinkctl, > return freed; > } > > +unsigned long super_cache_scan(struct shrinker *shrink, > + struct shrink_control *sc); > + > +static inline bool is_nfs_shrinker(struct shrinker *shrinker) > +{ > + struct super_block *sb = container_of(shrinker, > + struct super_block, s_shrink); > + > + if (shrinker->scan_objects == &super_cache_scan) > + return sb->s_magic == NFS_SUPER_MAGIC; > + > + return false; > +} > + > struct shrinker *get_shrinker(struct shrinker *shrinker) > { > /* > @@ -511,6 +525,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, > int nid, > .memcg = memcg, > }; > struct shrinker *shrinker; > + bool is_nfs; > > shrinker = idr_find(&shrinker_idr, i); > if (unlikely(!shrinker)) { > @@ -518,6 +533,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, > int nid, > continue; > } > > + is_nfs = is_nfs_shrinker(shrinker); > + > /* > * Take a refcnt on a shrinker so that it can't be freed or > * removed from shrinker_idr (and shrinker_list). These way we > @@ -527,10 +544,16 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, > int nid, > * take too much time to finish (e.g. on nfs). And holding > * global shrinker_rwsem can block registring and unregistring > * of shrinkers. > + * > + * The up_read logic should only be executed for nfs shrinker > + * path, because it has proven to hang. For others it should be > + * skipped to reduce performance penalties. > */ > - if(!get_shrinker(shrinker)) > - continue; > - up_read(&shrinker_rwsem); > + if (is_nfs) { > + if (!get_shrinker(shrinker)) > + continue; > + up_read(&shrinker_rwsem); > + } > > ret = do_shrink_slab(&sc, shrinker, priority); > if (ret == SHRINK_EMPTY) { > @@ -565,14 +588,18 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, > int nid, > * memcg_expand_one_shrinker_map if new shrinkers > * were registred in the meanwhile. > */ > - if (!down_read_trylock(&shrinker_rwsem)) { > - freed = freed ? : 1; > + if (is_nfs) { > + if (!down_read_trylock(&shrinker_rwsem)) { > + freed = freed ? : 1; > + put_shrinker(shrinker); > + return freed; > + } > put_shrinker(shrinker); > - return freed; > + map = memcg_nid_shrinker_map(memcg, nid); > + nr_max = min(shrinker_nr_max, map->nr_max); > + } else if (rwsem_is_contended(&shrinker_rwsem)) { > + break; > } > - put_shrinker(shrinker); > - map = memcg_nid_shrinker_map(memcg, nid); > - nr_max = min(shrinker_nr_max, map->nr_max); > } > unlock: > up_read(&shrinker_rwsem); > @@ -648,13 +675,17 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int > nid, > .for_drop_caches = for_drop_caches, > }; > > + bool is_nfs = is_nfs_shrinker(shrinker); > + > if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) > sc.nid = 0; > > /* See comment in shrink_slab_memcg. */ > - if(!get_shrinker(shrinker)) > - continue; > - up_read(&shrinker_rwsem); > + if (is_nfs) { > + if (!get_shrinker(shrinker)) > + continue; > + up_read(&shrinker_rwsem); > + } > > ret = do_shrink_slab(&sc, shrinker, priority); > if (ret == SHRINK_EMPTY) > @@ -665,12 +696,14 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int > nid, > * We can safely continue traverse of the shrinker_list as > * our shrinker is still on the list due to refcount. > */ > - if (!down_read_trylock(&shrinker_rwsem)) { > - freed = freed ? : 1; > + if (is_nfs) { > + if (!down_read_trylock(&shrinker_rwsem)) { > + freed = freed ? : 1; > + put_shrinker(shrinker); > + goto out; > + } > put_shrinker(shrinker); > - goto out; > } > - put_shrinker(shrinker); > } > > up_read(&shrinker_rwsem); > _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel