[Devel] [PATCH rh7] ploop: fix freeze/thaw ioctls

2016-07-15 Thread Maxim Patlasov
Current implementation suffers from several problems:

1) If someone, e.g. another instance of push-backup tool, mistakenly
attempts to freeze ploop while its thawing is in progress, we can
end up in double freeze.
2) After initiating thawing, no way to find out it by sysctl or /sys.
3) Handling PLOOP_S_FROZEN bit is not synchronized with ploop STOP/CLEAR
ioctls. It's not nice if ploop releases bdev keeping it in frozen state.

The patch fixes the above in straightforward way: more descriptive
plo->freeze_state, visible via /sys/block/ploopN/pstate/freeze_state, and
special checks in ioctl-s to ensure that freeze/thaw is allowed only on
running ploops and that thaw must preceed ploop STOP.

https://jira.sw.ru/browse/PSBM-49699

Signed-off-by: Maxim Patlasov 
---
 drivers/block/ploop/dev.c   |   34 ++
 drivers/block/ploop/sysfs.c |6 ++
 include/linux/ploop/ploop.h |8 +++-
 3 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 3dc94ca..81d463f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -3905,6 +3905,13 @@ static int ploop_stop(struct ploop_device * plo, struct 
block_device *bdev)
return -EBUSY;
}
 
+   if (plo->freeze_state != PLOOP_F_NORMAL) {
+   if (printk_ratelimit())
+   printk(KERN_INFO "stop ploop%d failed 
(freeze_state=%d)\n",
+  plo->index, plo->freeze_state);
+   return -EBUSY;
+   }
+
clear_bit(PLOOP_S_PUSH_BACKUP, &plo->state);
ploop_pb_stop(plo->pbd, true);
 
@@ -4819,15 +4826,21 @@ static int ploop_freeze(struct ploop_device *plo, 
struct block_device *bdev)
 {
struct super_block *sb = plo->sb;
 
-   if (test_bit(PLOOP_S_FROZEN, &plo->state))
+   if (!test_bit(PLOOP_S_RUNNING, &plo->state))
+   return -EINVAL;
+
+   if (plo->freeze_state == PLOOP_F_FROZEN)
return 0;
 
+   if (plo->freeze_state == PLOOP_F_THAWING)
+   return -EBUSY;
+
sb = freeze_bdev(bdev);
if (sb && IS_ERR(sb))
return PTR_ERR(sb);
 
plo->sb = sb;
-   set_bit(PLOOP_S_FROZEN, &plo->state);
+   plo->freeze_state = PLOOP_F_FROZEN;
return 0;
 }
 
@@ -4836,16 +4849,29 @@ static int ploop_thaw(struct ploop_device *plo, struct 
block_device *bdev)
struct super_block *sb = plo->sb;
int err;
 
-   if (!test_bit(PLOOP_S_FROZEN, &plo->state))
+   if (!test_bit(PLOOP_S_RUNNING, &plo->state))
+   return -EINVAL;
+
+   if (plo->freeze_state == PLOOP_F_NORMAL)
return 0;
 
+   if (plo->freeze_state == PLOOP_F_THAWING)
+   return -EBUSY;
+
plo->sb = NULL;
-   clear_bit(PLOOP_S_FROZEN, &plo->state);
+   plo->freeze_state = PLOOP_F_THAWING;
 
mutex_unlock(&plo->ctl_mutex);
err = thaw_bdev(bdev, sb);
mutex_lock(&plo->ctl_mutex);
 
+   BUG_ON(plo->freeze_state != PLOOP_F_THAWING);
+
+   if (!err)
+   plo->freeze_state = PLOOP_F_NORMAL;
+   else
+   plo->freeze_state = PLOOP_F_FROZEN;
+
return err;
 }
 
diff --git a/drivers/block/ploop/sysfs.c b/drivers/block/ploop/sysfs.c
index d6dcc83..71b2a20 100644
--- a/drivers/block/ploop/sysfs.c
+++ b/drivers/block/ploop/sysfs.c
@@ -425,6 +425,11 @@ static ssize_t print_push_backup_uuid(struct ploop_device 
* plo, char * page)
return snprintf(page, PAGE_SIZE, "%pUB\n", uuid);
 }
 
+static u32 show_freeze_state(struct ploop_device * plo)
+{
+   return plo->freeze_state;
+}
+
 #define _TUNE_U32(_name)   \
 static u32 show_##_name(struct ploop_device * plo) \
 {  \
@@ -507,6 +512,7 @@ static struct attribute *state_attributes[] = {
_A3(cookie),
_A3(push_backup_uuid),
_A(open_count),
+   _A(freeze_state),
NULL
 };
 
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index 7864edf..8ab4477 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -61,7 +61,12 @@ enum {
   (for minor mgmt only) */
PLOOP_S_ONCE,   /* An event (e.g. printk once) happened */
PLOOP_S_PUSH_BACKUP,/* Push_backup is in progress */
-   PLOOP_S_FROZEN  /* Frozen PLOOP_IOC_FREEZE */
+};
+
+enum {
+   PLOOP_F_NORMAL, /* Default: not yet freezed or unfrozen */
+   PLOOP_F_FROZEN, /* Frozen PLOOP_IOC_FREEZE */
+   PLOOP_F_THAWING,/* thaw_bdev is in progress */
 };
 
 struct ploop_snapdata
@@ -411,6 +416,7 @@ struct ploop_device
struct request_queue*queue;
struct task_struct  *thread;
struct super_block  *sb;
+   int freeze_state;
struct rb_node  link;
 
 

[Devel] [PATCH] ext4: improve ext4lazyinit scalability

2016-07-15 Thread Dmitry Monakhov
ext4lazyinit is global thread. This thread performs itable initalization under

It basically does followes:
ext4_lazyinit_thread
  ->mutex_lock(&eli->li_list_mtx);
  ->ext4_run_li_request(elr)
->ext4_init_inode_table-> Do a lot of IO if list is large

And when new mounts/umount arrives they have to block on ->li_list_mtx
because  lazy_thread holds it during full walk procedure.
ext4_fill_super
 ->ext4_register_li_request
   ->mutex_lock(&ext4_li_info->li_list_mtx);
   ->list_add(&elr->lr_request, &ext4_li_info >li_request_list);
In my case mount takes 40minutes on server with 36 * 4Tb HDD.
Convinient user may face this in case of very slow dev ( /dev/mmcblkXXX)
Even more. I one of filesystem was frozen lazyinit_thread will simply blocks
on sb_start_write() so other mount/umounts will suck forever.

This patch changes logic like follows:
- grap ->s_umount read sem before process new li_request after that it is safe
  to drop list_mtx because all callers of li_remove_requers are holds ->s_umount
  for write.
- li_thread skip frozen SB's

Locking:
Locking order is asserted by umout path like follows: s_umount ->li_list_mtx
so the only way to to grab ->s_mount inside li_thread is via down_read_trylock

https://jira.sw.ru/browse/PSBM-49658

Signed-off-by: Dmitry Monakhov 
---
 fs/ext4/super.c | 53 -
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3822a5a..0ee193f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2635,7 +2635,6 @@ static int ext4_run_li_request(struct ext4_li_request 
*elr)
sb = elr->lr_super;
ngroups = EXT4_SB(sb)->s_groups_count;
 
-   sb_start_write(sb);
for (group = elr->lr_next_group; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
@@ -2662,8 +2661,6 @@ static int ext4_run_li_request(struct ext4_li_request 
*elr)
elr->lr_next_sched = jiffies + elr->lr_timeout;
elr->lr_next_group = group + 1;
}
-   sb_end_write(sb);
-
return ret;
 }
 
@@ -2713,9 +2710,9 @@ static struct task_struct *ext4_lazyinit_task;
 static int ext4_lazyinit_thread(void *arg)
 {
struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
-   struct list_head *pos, *n;
struct ext4_li_request *elr;
unsigned long next_wakeup, cur;
+   LIST_HEAD(request_list);
 
BUG_ON(NULL == eli);
 
@@ -2728,21 +2725,43 @@ cont_thread:
mutex_unlock(&eli->li_list_mtx);
goto exit_thread;
}
-
-   list_for_each_safe(pos, n, &eli->li_request_list) {
-   elr = list_entry(pos, struct ext4_li_request,
-lr_request);
-
-   if (time_after_eq(jiffies, elr->lr_next_sched)) {
-   if (ext4_run_li_request(elr) != 0) {
-   /* error, remove the lazy_init job */
-   ext4_remove_li_request(elr);
-   continue;
+   list_splice_init(&eli->li_request_list, &request_list);
+   while (!list_empty(&request_list)) {
+   int err = 0;
+   int progress = 0;
+
+   elr = list_entry(request_list.next,
+struct ext4_li_request, lr_request);
+   list_move(request_list.next, &eli->li_request_list);
+   if (time_before(jiffies, elr->lr_next_sched)) {
+   if (time_before(elr->lr_next_sched, 
next_wakeup))
+   next_wakeup = elr->lr_next_sched;
+   continue;
+   }
+   if (down_read_trylock(&elr->lr_super->s_umount)) {
+   if (sb_start_write_trylock(elr->lr_super)) {
+   progress = 1;
+   /* We holds sb->s_umount, sb can not
+* be removed from the list, it is
+* now safe to drop li_list_mtx
+*/
+   mutex_unlock(&eli->li_list_mtx);
+   err = ext4_run_li_request(elr);
+   sb_end_write(elr->lr_super);
+   mutex_lock(&eli->li_list_mtx);
}
+   up_read((&elr->lr_super->s_umount));
+   }
+   /* error, remove the lazy_init job */
+   if (err) {
+   ext4_remove_li_request(elr);
+   cont

Re: [Devel] [PATCH rh7] ploop: release plo->ctl_mutex for thaw_bdev in PLOOP_IOC_THAW handler

2016-07-15 Thread Maxim Patlasov

On 07/15/2016 01:10 AM, Vladimir Davydov wrote:

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index d52975eaaa36..3dc94ca5c393 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -4839,11 +4839,12 @@ static int ploop_thaw(struct ploop_device *plo, struct 
block_device *bdev)
if (!test_bit(PLOOP_S_FROZEN, &plo->state))
return 0;
  
+	plo->sb = NULL;

+   clear_bit(PLOOP_S_FROZEN, &plo->state);
+
+   mutex_unlock(&plo->ctl_mutex);


Since this point nothing in ploop state hints that the device is still 
frozen. Someone (another instance of backup tool?) may mistakenly try to 
freeze it again (before we call thaw_bdev) and succeed in it (because 
the S_FROZEN bit is already cleared). The result would be "double 
freeze" that we tried to avoid by initial patch. The fix must be simple, 
I'll send a patch soon.


Thanks,
Maxim


err = thaw_bdev(bdev, sb);
-   if (!err) {
-   plo->sb = NULL;
-   clear_bit(PLOOP_S_FROZEN, &plo->state);
-   }
+   mutex_lock(&plo->ctl_mutex);
  
  	return err;

  }


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [NEW KERNEL] 3.10.0-327.18.2.vz7.14.26 (rhel7)

2016-07-15 Thread builder
Changelog:

OpenVZ kernel rh7-3.10.0-327.18.2.vz7.14.26

* fix ploop backup deadlock


Generated changelog:

* Fri Jul 15 2016 Vladimir Davydov  
[3.10.0-327.18.2.vz7.14.26]
- ploop: release plo->ctl_mutex for thaw_bdev in PLOOP_IOC_THAW handler 
(Vladimir Davydov) [PSBM-49699]


Built packages: 
http://kojistorage.eng.sw.ru/packages/vzkernel/3.10.0/327.18.2.vz7.14.26/
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH RHEL7 COMMIT] ploop: release plo->ctl_mutex for thaw_bdev in PLOOP_IOC_THAW handler

2016-07-15 Thread Vladimir Davydov
The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.25
-->
commit b81bf870a6c314b664977ee9b6747cf93e78bcc3
Author: Vladimir Davydov 
Date:   Fri Jul 15 13:34:36 2016 +0400

ploop: release plo->ctl_mutex for thaw_bdev in PLOOP_IOC_THAW handler

Recent patch to ploop 91a74e3b91a ("ploop: add PLOOP_IOC_FREEZE and
PLOOP_IOC_THAW ioctls") introduced the follwing deadlock:

Thread 1:
[] sleep_on_buffer+0xe/0x20
[] __sync_dirty_buffer+0xb8/0xe0
[] sync_dirty_buffer+0x13/0x20
[] ext4_commit_super+0x1b0/0x240 [ext4]
[] ext4_unfreeze+0x2d/0x40 [ext4]
[] thaw_super+0x3f/0xb0
[] thaw_bdev+0x65/0x80
[] ploop_ioctl+0x6d0/0x29f0 [ploop]
[] blkdev_ioctl+0x2df/0x770
[] block_ioctl+0x41/0x50
[] do_vfs_ioctl+0x255/0x4f0
[] SyS_ioctl+0x54/0xa0
[] system_call_fastpath+0x16/0x1b
[] 0x

Thread 2:
[] ploop_pb_get_pending+0x163/0x290 [ploop]
[] ploop_push_backup_io_get.isra.26+0x81/0x1b0 [ploop]
[] ploop_push_backup_io+0x15b/0x260 [ploop]
[] ploop_ioctl+0xe96/0x29f0 [ploop]
[] blkdev_ioctl+0x2df/0x770
[] block_ioctl+0x41/0x50
[] do_vfs_ioctl+0x255/0x4f0
[] SyS_ioctl+0x54/0xa0
[] system_call_fastpath+0x16/0x1b
[] 0x

E.g. thread 1 is thawing ploop with PLOOP_IOC_THAW ioctl which holds
plo->ctl_mutex during its work. To thaw itself, ext4 has to commit some
data. This commit triggers push backup out-of-order request which must
be processed and acked by userspace to be completed. But userspace can't
process it, because ploop_pb_get_pending() wants the same mutext. Thus,
deadlock.

Fix the deadlock by releasing the mutex before calling thaw_bdev and
reaquiring it after thaw_bdev is done.

https://jira.sw.ru/browse/PSBM-49699

Reported-by: Pavel Borzenkov 
Signed-off-by: Vladimir Davydov 
Cc: Maxim Patlasov 
---
 drivers/block/ploop/dev.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index d52975eaaa36..3dc94ca5c393 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -4839,11 +4839,12 @@ static int ploop_thaw(struct ploop_device *plo, struct 
block_device *bdev)
if (!test_bit(PLOOP_S_FROZEN, &plo->state))
return 0;
 
+   plo->sb = NULL;
+   clear_bit(PLOOP_S_FROZEN, &plo->state);
+
+   mutex_unlock(&plo->ctl_mutex);
err = thaw_bdev(bdev, sb);
-   if (!err) {
-   plo->sb = NULL;
-   clear_bit(PLOOP_S_FROZEN, &plo->state);
-   }
+   mutex_lock(&plo->ctl_mutex);
 
return err;
 }
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH rh7] ploop: release plo->ctl_mutex for thaw_bdev in PLOOP_IOC_THAW handler

2016-07-15 Thread Vladimir Davydov
Recent patch to ploop 91a74e3b91a ("ploop: add PLOOP_IOC_FREEZE and
PLOOP_IOC_THAW ioctls") introduced the follwing deadlock:

Thread 1:
[] sleep_on_buffer+0xe/0x20
[] __sync_dirty_buffer+0xb8/0xe0
[] sync_dirty_buffer+0x13/0x20
[] ext4_commit_super+0x1b0/0x240 [ext4]
[] ext4_unfreeze+0x2d/0x40 [ext4]
[] thaw_super+0x3f/0xb0
[] thaw_bdev+0x65/0x80
[] ploop_ioctl+0x6d0/0x29f0 [ploop]
[] blkdev_ioctl+0x2df/0x770
[] block_ioctl+0x41/0x50
[] do_vfs_ioctl+0x255/0x4f0
[] SyS_ioctl+0x54/0xa0
[] system_call_fastpath+0x16/0x1b
[] 0x

Thread 2:
[] ploop_pb_get_pending+0x163/0x290 [ploop]
[] ploop_push_backup_io_get.isra.26+0x81/0x1b0 [ploop]
[] ploop_push_backup_io+0x15b/0x260 [ploop]
[] ploop_ioctl+0xe96/0x29f0 [ploop]
[] blkdev_ioctl+0x2df/0x770
[] block_ioctl+0x41/0x50
[] do_vfs_ioctl+0x255/0x4f0
[] SyS_ioctl+0x54/0xa0
[] system_call_fastpath+0x16/0x1b
[] 0x

E.g. thread 1 is thawing ploop with PLOOP_IOC_THAW ioctl which holds
plo->ctl_mutex during its work. To thaw itself, ext4 has to commit some
data. This commit triggers push backup out-of-order request which must
be processed and acked by userspace to be completed. But userspace can't
process it, because ploop_pb_get_pending() wants the same mutext. Thus,
deadlock.

Fix the deadlock by releasing the mutex before calling thaw_bdev and
reaquiring it after thaw_bdev is done.

https://jira.sw.ru/browse/PSBM-49699

Reported-by: Pavel Borzenkov 
Signed-off-by: Vladimir Davydov 
Cc: Maxim Patlasov 
---
 drivers/block/ploop/dev.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index d52975eaaa36..3dc94ca5c393 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -4839,11 +4839,12 @@ static int ploop_thaw(struct ploop_device *plo, struct 
block_device *bdev)
if (!test_bit(PLOOP_S_FROZEN, &plo->state))
return 0;
 
+   plo->sb = NULL;
+   clear_bit(PLOOP_S_FROZEN, &plo->state);
+
+   mutex_unlock(&plo->ctl_mutex);
err = thaw_bdev(bdev, sb);
-   if (!err) {
-   plo->sb = NULL;
-   clear_bit(PLOOP_S_FROZEN, &plo->state);
-   }
+   mutex_lock(&plo->ctl_mutex);
 
return err;
 }
-- 
2.1.4

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel