[Devel] [PATCH RHEL7 COMMIT] mm: Fixing rwsem_is_contented conditional code in shrink_slab_memcg

2020-08-21 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.5
-->
commit 41057a6cdd8f0c3d4c8bf9cb4ad5254b62127b98
Author: Valeriy Vdovin 
Date:   Fri Aug 21 17:41:44 2020 +0300

mm: Fixing rwsem_is_contented conditional code in shrink_slab_memcg

Fixes commit 38afbd5ecdd6 ("Reduce access frequency to shrinker_rwsem 
during shrink_slab")
that partially reverts code in shrink_slab_memcg by adding missing line.

https://jira.sw.ru/browse/PSBM-99181

Signed-off-by: Valeriy Vdovin 
---
 mm/vmscan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4fa86e7..13ae9bd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -598,6 +598,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int 
nid,
map = memcg_nid_shrinker_map(memcg, nid);
nr_max = min(shrinker_nr_max, map->nr_max);
} else if (rwsem_is_contended(&shrinker_rwsem)) {
+   freed = freed ? : 1;
break;
}
}
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7] mm: Fixing rwsem_is_contented conditional code in shrink_slab_memcg

2020-08-21 Thread Valeriy Vdovin
Fixes commit 38afbd5ecdd6841b5e486e3c9dae05d961f084b5 that partially
reverts code in shrink_slab_memcg by adding missing line.

https://jira.sw.ru/browse/PSBM-99181

Signed-off-by: Valeriy Vdovin 
---
 mm/vmscan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4fa86e7..13ae9bd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -598,6 +598,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int 
nid,
map = memcg_nid_shrinker_map(memcg, nid);
nr_max = min(shrinker_nr_max, map->nr_max);
} else if (rwsem_is_contended(&shrinker_rwsem)) {
+   freed = freed ? : 1;
break;
}
}
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] mm: Reduce access frequency to shrinker_rwsem during shrink_slab

2020-08-21 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.3
-->
commit adb31bddd253f8178635c1b1f61f4cbe7040a23f
Author: Valeriy Vdovin 
Date:   Fri Aug 21 12:25:23 2020 +0300

mm: Reduce access frequency to shrinker_rwsem during shrink_slab

Bug https://jira.sw.ru/browse/PSBM-99181 has introduced a problem: when
the kernel has opened NFS delegations and NFS server is not accessible
at the time when NFS shrinker is called, the whole shrinker list
execution gets stuck until NFS server is back. Being a problem in itself
it also introduces bigger problem - during that hang, the shrinker_rwsem
also gets locked, consequently no new mounts can be done at that time
because new superblock tries to register it's own shrinker and also gets
stuck at aquiring shrinker_rwsem.

Commit 9e9e35d050955648449498827deb2d43be0564e1 is a workaround for that
problem. It is known that during signle shrinker execution we do not
actually need to hold shrinker_rwsem so we release and reacqiure the
rwsem for each shrinker in the list.

Because of this workaround shrink_slab function now experiences a major
slowdown, because shrinker_rwsem gets accessed for each shrinker in the
list twice. On an idle fresh-booted system shrinker_list could be
iterated up to 1600 times a second, although originally the problem was
local to only one NFS shrinker.

This patch fixes commit 9e9e35d050955648449498827deb2d43be0564e1 in a
way that before calling for up_read for shrinker_rwsem, we check that
this is really an NFS shrinker by checking NFS magic in superblock, if
it is accessible from shrinker.

Changes:
  v2: Added missing 'rwsem_is_contented' check

https://jira.sw.ru/browse/PSBM-99181

Co-authored-by: Andrey Ryabinin 
Signed-off-by: Valeriy Vdovin 
Reviewed-by: Andrey Ryabinin 
---
 fs/super.c  |  2 +-
 mm/vmscan.c | 65 ++---
 2 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index f131d14..1cf377a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -80,7 +80,7 @@ EXPORT_SYMBOL(dcache_is_low);
  * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
  * take a passive reference to the superblock to avoid this from occurring.
  */
-static unsigned long super_cache_scan(struct shrinker *shrink,
+unsigned long super_cache_scan(struct shrinker *shrink,
  struct shrink_control *sc)
 {
struct super_block *sb;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d7082d2..4fa86e7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -453,6 +453,20 @@ static unsigned long do_shrink_slab(struct shrink_control 
*shrinkctl,
return freed;
 }
 
+unsigned long super_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc);
+
+static inline bool is_nfs_shrinker(struct shrinker *shrinker)
+{
+   struct super_block *sb = container_of(shrinker,
+   struct super_block, s_shrink);
+
+   if (shrinker->scan_objects == &super_cache_scan)
+   return sb->s_magic == NFS_SUPER_MAGIC;
+
+   return false;
+}
+
 struct shrinker *get_shrinker(struct shrinker *shrinker)
 {
/*
@@ -511,6 +525,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int 
nid,
.memcg = memcg,
};
struct shrinker *shrinker;
+   bool is_nfs;
 
shrinker = idr_find(&shrinker_idr, i);
if (unlikely(!shrinker)) {
@@ -518,6 +533,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int 
nid,
continue;
}
 
+   is_nfs = is_nfs_shrinker(shrinker);
+
/*
 * Take a refcnt on a shrinker so that it can't be freed or
 * removed from shrinker_idr (and shrinker_list). These way we
@@ -527,10 +544,16 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, 
int nid,
 * take too much time to finish (e.g. on nfs). And holding
 * global shrinker_rwsem can block registring and unregistring
 * of shrinkers.
+*
+* The up_read logic should only be executed for nfs shrinker
+* path, because it has proven to hang. For others it should be
+* skipped to reduce performance penalties.
 */
-   if(!get_shrinker(shrinker))
-   continue;
-   up_read(&shrinker_rwsem);
+   if (is_nfs) {
+   if (!get_shrinker(shrinker))
+   continue;
+   up_read(&shrinker_rwsem);
+   }
 
ret = do_s

Re: [Devel] [PATCH RHEL v2] mm: Reduce access frequency to shrinker_rwsem during shrink_slab

2020-08-21 Thread Andrey Ryabinin



On 8/20/20 5:51 PM, Valeriy Vdovin wrote:
> Bug https://jira.sw.ru/browse/PSBM-99181 has introduced a problem: when
> the kernel has opened NFS delegations and NFS server is not accessible
> at the time when NFS shrinker is called, the whole shrinker list
> execution gets stuck until NFS server is back. Being a problem in itself
> it also introduces bigger problem - during that hang, the shrinker_rwsem
> also gets locked, consequently no new mounts can be done at that time
> because new superblock tries to register it's own shrinker and also gets
> stuck at aquiring shrinker_rwsem.
> 
> Commit 9e9e35d050955648449498827deb2d43be0564e1 is a workaround for that
> problem. It is known that during signle shrinker execution we do not
> actually need to hold shrinker_rwsem so we release and reacqiure the
> rwsem for each shrinker in the list.
> 
> Because of this workaround shrink_slab function now experiences a major
> slowdown, because shrinker_rwsem gets accessed for each shrinker in the
> list twice. On an idle fresh-booted system shrinker_list could be
> iterated up to 1600 times a second, although originally the problem was
> local to only one NFS shrinker.
> 
> This patch fixes commit 9e9e35d050955648449498827deb2d43be0564e1 in a
> way that before calling for up_read for shrinker_rwsem, we check that
> this is really an NFS shrinker by checking NFS magic in superblock, if
> it is accessible from shrinker.
> 
> https://jira.sw.ru/browse/PSBM-99181
> 
> Co-authored-by: Andrey Ryabinin 
> Signed-off-by: Valeriy Vdovin 
> 
> Changes:
>   v2: Added missing 'rwsem_is_contented' check
> ---

Reviewed-by: Andrey Ryabinin 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] ploop: Enable 1M holes with zeroing indexes for io_kaio

2020-08-21 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.3
-->
commit 7207d56e25960c0d22162a35cdcd80a9bc57bfea
Author: Kirill Tkhai 
Date:   Fri Aug 21 11:06:46 2020 +0300

ploop: Enable 1M holes with zeroing indexes for io_kaio

This just enables the functionality for testing.
1M allocation is not implemented yet.

https://jira.sw.ru/browse/PSBM-105347
Signed-off-by: Kirill Tkhai 
---
 drivers/block/ploop/fmt_ploop1.c |  2 +-
 drivers/block/ploop/io_kaio.c| 14 --
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/block/ploop/fmt_ploop1.c b/drivers/block/ploop/fmt_ploop1.c
index 2180dfc..b5f9594 100644
--- a/drivers/block/ploop/fmt_ploop1.c
+++ b/drivers/block/ploop/fmt_ploop1.c
@@ -333,7 +333,7 @@ ploop1_open(struct ploop_delta * delta)
((u64)ph->bd_size + ph->l1_off) << 9)
delta->flags |= PLOOP_FMT_PREALLOCATED;
 
-   if (delta->io.ops->id != PLOOP_IO_DIRECT || kaio_backed_ext4)
+   if (delta->io.ops->id != PLOOP_IO_DIRECT && !kaio_backed_ext4)
set_bit(PLOOP_S_NO_FALLOC_DISCARD, &delta->plo->state);
 
return 0;
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index 020e79f..365e2e3 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -17,6 +17,7 @@
 #include 
 
 #include 
+#include 
 
 #define KAIO_PREALLOC (128 * 1024 * 1024) /* 128 MB */
 
@@ -183,8 +184,17 @@ static int kaio_kernel_submit(struct file *file, struct 
kaio_req *kreq,
 
if (rw & REQ_DISCARD) {
op = IOCB_CMD_UNMAP_ITER;
-   if (file_inode(file)->i_sb->s_magic == EXT4_SUPER_MAGIC)
-   return -ENOTSUPP;
+   if (file_inode(file)->i_sb->s_magic == EXT4_SUPER_MAGIC) {
+   err = file->f_op->fallocate(file,
+   FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE,
+   pos, count);
+   if (err == 0) {
+   kaio_complete_io_request(kreq->preq);
+   /* Otherwise, caller decrements counter */
+   }
+   return err;
+   }
+
} else if (rw & REQ_WRITE)
op = IOCB_CMD_WRITE_ITER;
else
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] ms/vt: vt_ioctl: fix VT_DISALLOCATE freeing in-use virtual console

2020-08-21 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.3
-->
commit b427c08947e3e42a4ef9bf304219a82361e36afa
Author: Eric Biggers 
Date:   Fri Aug 21 11:06:37 2020 +0300

ms/vt: vt_ioctl: fix VT_DISALLOCATE freeing in-use virtual console

The VT_DISALLOCATE ioctl can free a virtual console while tty_release()
is still running, causing a use-after-free in con_shutdown().  This
occurs because VT_DISALLOCATE considers a virtual console's
'struct vc_data' to be unused as soon as the corresponding tty's
refcount hits 0.  But actually it may be still being closed.

Fix this by making vc_data be reference-counted via the embedded
'struct tty_port'.  A newly allocated virtual console has refcount 1.
Opening it for the first time increments the refcount to 2.  Closing it
for the last time decrements the refcount (in tty_operations::cleanup()
so that it happens late enough), as does VT_DISALLOCATE.

Reproducer:
#include 
#include 
#include 
#include 

int main()
{
if (fork()) {
for (;;)
close(open("/dev/tty5", O_RDWR));
} else {
int fd = open("/dev/tty10", O_RDWR);

for (;;)
ioctl(fd, VT_DISALLOCATE, 5);
}
}

KASAN report:
BUG: KASAN: use-after-free in con_shutdown+0x76/0x80 
drivers/tty/vt/vt.c:3278
Write of size 8 at addr 88806a4ec108 by task syz_vt/129

CPU: 0 PID: 129 Comm: syz_vt Not tainted 5.6.0-rc2 #11
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
?-20191223_100556-anatol 04/01/2014
Call Trace:
 [...]
 con_shutdown+0x76/0x80 drivers/tty/vt/vt.c:3278
 release_tty+0xa8/0x410 drivers/tty/tty_io.c:1514
 tty_release_struct+0x34/0x50 drivers/tty/tty_io.c:1629
 tty_release+0x984/0xed0 drivers/tty/tty_io.c:1789
 [...]

Allocated by task 129:
 [...]
 kzalloc include/linux/slab.h:669 [inline]
 vc_allocate drivers/tty/vt/vt.c:1085 [inline]
 vc_allocate+0x1ac/0x680 drivers/tty/vt/vt.c:1066
 con_install+0x4d/0x3f0 drivers/tty/vt/vt.c:3229
 tty_driver_install_tty drivers/tty/tty_io.c:1228 [inline]
 tty_init_dev+0x94/0x350 drivers/tty/tty_io.c:1341
 tty_open_by_driver drivers/tty/tty_io.c:1987 [inline]
 tty_open+0x3ca/0xb30 drivers/tty/tty_io.c:2035
 [...]

Freed by task 130:
 [...]
 kfree+0xbf/0x1e0 mm/slab.c:3757
 vt_disallocate drivers/tty/vt/vt_ioctl.c:300 [inline]
 vt_ioctl+0x16dc/0x1e30 drivers/tty/vt/vt_ioctl.c:818
 tty_ioctl+0x9db/0x11b0 drivers/tty/tty_io.c:2660
 [...]

Fixes: 4001d7b7fc27 ("vt: push down the tty lock so we can see what is left 
to tackle")
Cc:  # v3.4+
Reported-by: syzbot+522643ab5729b0421...@syzkaller.appspotmail.com
Acked-by: Jiri Slaby 
Signed-off-by: Eric Biggers 
Link: https://lore.kernel.org/r/20200322034305.210082-2-ebigg...@kernel.org
Signed-off-by: Greg Kroah-Hartman 

https://jira.sw.ru/browse/PSBM-106391
(cherry-picked from commit ca4463bf8438b403596edd0ec961ca0d4fbe0220)
Signed-off-by: Andrey Ryabinin 
---
 drivers/tty/vt/vt.c   | 23 ++-
 drivers/tty/vt/vt_ioctl.c | 12 
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 0ee0cd5..795d786 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -767,6 +767,17 @@ static void visual_deinit(struct vc_data *vc)
module_put(vc->vc_sw->owner);
 }
 
+static void vc_port_destruct(struct tty_port *port)
+{
+   struct vc_data *vc = container_of(port, struct vc_data, port);
+
+   kfree(vc);
+}
+
+static const struct tty_port_operations vc_port_ops = {
+   .destruct = vc_port_destruct,
+};
+
 int vc_allocate(unsigned int currcons) /* return 0 on success */
 {
struct vt_notifier_param param;
@@ -792,6 +803,7 @@ int vc_allocate(unsigned int currcons)  /* return 0 on 
success */
 
vc_cons[currcons].d = vc;
tty_port_init(&vc->port);
+   vc->port.ops = &vc_port_ops;
INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
 
visual_init(vc, currcons, 1);
@@ -2799,6 +2811,7 @@ static int con_install(struct tty_driver *driver, struct 
tty_struct *tty)
 
tty->driver_data = vc;
vc->port.tty = tty;
+   tty_port_get(&vc->port);
 
if (!tty->winsize.ws_row && !tty->winsize.ws_col) {
tty->winsize.ws_row = vc_cons[currcons].d->vc_rows;
@@ -2834,6 +2847,13 @@ static void con_shutdown(struct tty_struct *tty)
console_unlock();
 }

[Devel] [PATCH RHEL7 COMMIT] ploop: io_kaio: Introduce 4K discard_granuality with 1M clearing indexes when possible

2020-08-21 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.3
-->
commit b93800202805f72314dda1298beba8110bf07252
Author: Kirill Tkhai 
Date:   Fri Aug 21 11:06:11 2020 +0300

ploop: io_kaio: Introduce 4K discard_granuality with 1M clearing indexes 
when possible

1)In case of discard size is less then 1 cluster,
  a small hole is punched.
2)In case of discard request covers the whole cluster,
  index in BAT is also cleared.

Since small 4K holes require 4K discard_granuality,
this makes impossible to use block level to make
discard requests 1 cluster aligned. This requires
us to split the requests manually, so force_split_discard_reqs
parameter was introduced.

See comments in kaio_queue_settings() for details.

https://jira.sw.ru/browse/PSBM-105347

Signed-off-by: Kirill Tkhai 
---
 drivers/block/ploop/dev.c | 12 
 drivers/block/ploop/io_kaio.c | 36 +++-
 include/linux/ploop/ploop.h   |  1 +
 3 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 5f1a0c2..25b516c 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -979,7 +979,8 @@ static void ploop_make_request(struct request_queue *q, 
struct bio *bio)
 * bio layer assumes that it can prepare single-page bio
 * not depending on any alignment constraints. So be it.
 */
-   if (!(bio->bi_rw & REQ_DISCARD) && bio->bi_size &&
+   if ((!(bio->bi_rw & REQ_DISCARD) || plo->force_split_discard_reqs) &&
+   bio->bi_size &&
(bio->bi_sector >> cluster_log) !=
((bio->bi_sector + (bio->bi_size >> 9) - 1) >> cluster_log)) {
struct bio_pair *bp;
@@ -988,7 +989,8 @@ static void ploop_make_request(struct request_queue *q, 
struct bio *bio)
 
plo->st.bio_splits++;
 
-   BUG_ON(bio->bi_vcnt != 1 || bio->bi_idx != 0);
+   if (!(bio->bi_rw & REQ_DISCARD))
+   BUG_ON(bio->bi_vcnt != 1 || bio->bi_idx != 0);
 
bp = bio_split(bio, first_sectors);
ploop_make_request(q, &bp->bio1);
@@ -2298,7 +2300,7 @@ static bool ploop_can_issue_discard(struct ploop_device 
*plo,
if (!list_is_singular(&plo->map.delta_list))
return false;
 
-   return whole_block(plo, preq);
+   return whole_block(plo, preq) || plo->force_split_discard_reqs;
 }
 
 static void
@@ -2568,7 +2570,8 @@ delta_io:
}
preq->iblock = iblk;
if (!(preq->req_rw & REQ_DISCARD) ||
-   (delta->ops->capability & 
PLOOP_FMT_CAP_IDENTICAL))
+   (delta->ops->capability & 
PLOOP_FMT_CAP_IDENTICAL) ||
+   !whole_block(plo, preq))
preq->eng_state = PLOOP_E_COMPLETE;
else
preq->eng_state = PLOOP_E_DATA_WBI;
@@ -4205,6 +4208,7 @@ static int ploop_start(struct ploop_device * plo, struct 
block_device *bdev)
blk_queue_merge_bvec(q, ploop_merge_bvec);
blk_queue_flush(q, REQ_FLUSH);
 
+   plo->force_split_discard_reqs = false;
top_delta->io.ops->queue_settings(&top_delta->io, q);
/* REQ_WRITE_SAME is not supported */
blk_queue_max_write_same_sectors(q, 0);
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index 1151b86..020e79f 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -1130,7 +1130,41 @@ static void kaio_queue_settings(struct ploop_io * io, 
struct request_queue * q)
if (inode->i_sb->s_magic == EXT4_SUPER_MAGIC) {
WARN_ON(!kaio_backed_ext4);
blk_queue_stack_limits(q, bdev_get_queue(io->files.bdev));
-   ploop_set_discard_limits(io->plo);
+   /*
+* There is no a way to force block engine to split a request
+* to fit a single cluster, when discard granuality is 4K
+* (inherited from fs block size in blk_queue_stack_limits()).
+* So, ploop_make_request() splits them.
+*/
+   io->plo->force_split_discard_reqs = true;
+   /*
+* Why not (1 << io->plo->cluster_log)?
+* Someone may want to clear indexes in case of a request
+* is big enough to fit the whole cluster.
+* In case of max_discard_sectors is 1 cluster, a request
+* for [cluster_start - 4K, cluster_start + cluster_size)
+* at block level will be splitted in two requests:
+*
+*

[Devel] [PATCH RHEL7 COMMIT] ms/netfilter: nf_tables: validate NFTA_SET_TABLE parameter

2020-08-21 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.3
-->
commit d3bef0951e541d534dc9f9e9107d37b1a9eaed6b
Author: Phil Turnbull 
Date:   Fri Aug 21 11:06:30 2020 +0300

ms/netfilter: nf_tables: validate NFTA_SET_TABLE parameter

If the NFTA_SET_TABLE parameter is missing and the NLM_F_DUMP flag is
not set, then a NULL pointer dereference is triggered in
nf_tables_set_lookup because ctx.table is NULL.

Signed-off-by: Phil Turnbull 
Signed-off-by: Pablo Neira Ayuso 

https://jira.sw.ru/browse/PSBM-106408
(cherry-picked from commit ca4463bf8438b403596edd0ec961ca0d4fbe0220)
Signed-off-by: Andrey Ryabinin 
---
 net/netfilter/nf_tables_api.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4692c36..14e030b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2680,6 +2680,8 @@ static int nf_tables_getset(struct sock *nlsk, struct 
sk_buff *skb,
/* Only accept unspec with dump */
if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
return -EAFNOSUPPORT;
+   if (!nla[NFTA_SET_TABLE])
+   return -EINVAL;
 
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] fixup ploop: Cleanup in ploop_make_request()

2020-08-21 Thread Vasily Averin
The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.3
-->
commit 7d2d185ee6b2dad71fba61a1f673b616c6d78104
Author: Kirill Tkhai 
Date:   Fri Aug 21 11:06:19 2020 +0300

fixup ploop: Cleanup in ploop_make_request()

BUG_ON(bio->bi_idx) is already checked in start of this function,
so there is no a reason to check that twise.

Signed-off-by: Kirill Tkhai 
VvS: to be merged with: "ploop: add ploop to the tree"
---
 drivers/block/ploop/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 25b516c..36c063b 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -990,7 +990,7 @@ static void ploop_make_request(struct request_queue *q, 
struct bio *bio)
plo->st.bio_splits++;
 
if (!(bio->bi_rw & REQ_DISCARD))
-   BUG_ON(bio->bi_vcnt != 1 || bio->bi_idx != 0);
+   BUG_ON(bio->bi_vcnt != 1)
 
bp = bio_split(bio, first_sectors);
ploop_make_request(q, &bp->bio1);
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH RHEL v2] mm: Reduce access frequency to shrinker_rwsem during shrink_slab

2020-08-21 Thread Vasily Averin
Andrey,
I'm waiting for your approval here.

On 8/20/20 5:51 PM, Valeriy Vdovin wrote:
> Bug https://jira.sw.ru/browse/PSBM-99181 has introduced a problem: when
> the kernel has opened NFS delegations and NFS server is not accessible
> at the time when NFS shrinker is called, the whole shrinker list
> execution gets stuck until NFS server is back. Being a problem in itself
> it also introduces bigger problem - during that hang, the shrinker_rwsem
> also gets locked, consequently no new mounts can be done at that time
> because new superblock tries to register it's own shrinker and also gets
> stuck at aquiring shrinker_rwsem.
> 
> Commit 9e9e35d050955648449498827deb2d43be0564e1 is a workaround for that
> problem. It is known that during signle shrinker execution we do not
> actually need to hold shrinker_rwsem so we release and reacqiure the
> rwsem for each shrinker in the list.
> 
> Because of this workaround shrink_slab function now experiences a major
> slowdown, because shrinker_rwsem gets accessed for each shrinker in the
> list twice. On an idle fresh-booted system shrinker_list could be
> iterated up to 1600 times a second, although originally the problem was
> local to only one NFS shrinker.
> 
> This patch fixes commit 9e9e35d050955648449498827deb2d43be0564e1 in a
> way that before calling for up_read for shrinker_rwsem, we check that
> this is really an NFS shrinker by checking NFS magic in superblock, if
> it is accessible from shrinker.
> 
> https://jira.sw.ru/browse/PSBM-99181
> 
> Co-authored-by: Andrey Ryabinin 
> Signed-off-by: Valeriy Vdovin 
> 
> Changes:
>   v2: Added missing 'rwsem_is_contented' check
> ---
>  fs/super.c  |  2 +-
>  mm/vmscan.c | 65 
> ++---
>  2 files changed, 50 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/super.c b/fs/super.c
> index f131d14..1cf377a 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -80,7 +80,7 @@ EXPORT_SYMBOL(dcache_is_low);
>   * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
>   * take a passive reference to the superblock to avoid this from occurring.
>   */
> -static unsigned long super_cache_scan(struct shrinker *shrink,
> +unsigned long super_cache_scan(struct shrinker *shrink,
> struct shrink_control *sc)
>  {
>   struct super_block *sb;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index d7082d2..4fa86e7 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -453,6 +453,20 @@ static unsigned long do_shrink_slab(struct 
> shrink_control *shrinkctl,
>   return freed;
>  }
>  
> +unsigned long super_cache_scan(struct shrinker *shrink,
> +   struct shrink_control *sc);
> +
> +static inline bool is_nfs_shrinker(struct shrinker *shrinker)
> +{
> + struct super_block *sb = container_of(shrinker,
> + struct super_block, s_shrink);
> +
> + if (shrinker->scan_objects == &super_cache_scan)
> + return sb->s_magic == NFS_SUPER_MAGIC;
> +
> + return false;
> +}
> +
>  struct shrinker *get_shrinker(struct shrinker *shrinker)
>  {
>   /*
> @@ -511,6 +525,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, 
> int nid,
>   .memcg = memcg,
>   };
>   struct shrinker *shrinker;
> + bool is_nfs;
>  
>   shrinker = idr_find(&shrinker_idr, i);
>   if (unlikely(!shrinker)) {
> @@ -518,6 +533,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, 
> int nid,
>   continue;
>   }
>  
> + is_nfs = is_nfs_shrinker(shrinker);
> +
>   /*
>* Take a refcnt on a shrinker so that it can't be freed or
>* removed from shrinker_idr (and shrinker_list). These way we
> @@ -527,10 +544,16 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, 
> int nid,
>* take too much time to finish (e.g. on nfs). And holding
>* global shrinker_rwsem can block registring and unregistring
>* of shrinkers.
> +  *
> +  * The up_read logic should only be executed for nfs shrinker
> +  * path, because it has proven to hang. For others it should be
> +  * skipped to reduce performance penalties.
>*/
> - if(!get_shrinker(shrinker))
> - continue;
> - up_read(&shrinker_rwsem);
> + if (is_nfs) {
> + if (!get_shrinker(shrinker))
> + continue;
> + up_read(&shrinker_rwsem);
> + }
>  
>   ret = do_shrink_slab(&sc, shrinker, priority);
>   if (ret == SHRINK_EMPTY) {
> @@ -565,14 +588,18 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, 
> int nid,
>* memcg_expand_one_shrinker_map if new shrinkers
>* were registred in the meanw