Ploop thread may not reschedule for a long time and cause softlockup
warnings on a debug kernel:

 NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ploop56082:13917]
...
 RIP: 0010:[<ffffffff8256f426>]  [<ffffffff8256f426>] 
_raw_spin_unlock_irqrestore+0x46/0xc0
...
 Call Trace:
  [<ffffffff816194f7>] __slab_free+0x207/0x2f0
  [<ffffffff81619ff1>] kmem_cache_free+0x2d1/0x380
  [<ffffffff81514412>] mempool_free_slab+0x22/0x30
  [<ffffffff815159ae>] mempool_free+0x14e/0x1f0
  [<ffffffff81747d6a>] bio_put+0x17a/0x270
  [<ffffffff81735837>] end_bio_bh_io_sync+0xc7/0x120
  [<ffffffff81747d6a>] bio_put+0x17a/0x270
  [<ffffffff81735837>] end_bio_bh_io_sync+0xc7/0x120
  [<ffffffff8174a5ac>] bio_endio+0x13c/0x220
  [<ffffffffa0858a38>] ploop_complete_request+0x148/0x1c60 [ploop]
  [<ffffffffa086b06d>] ploop_req_state_process+0x1f7d/0x3870 [ploop]
  [<ffffffffa086cfce>] ploop_thread+0x66e/0x1170 [ploop]
  [<ffffffff8121dcb6>] kthread+0x1e6/0x250
  [<ffffffff8258ae58>] ret_from_fork+0x58/0x90

Add cond_resched() on each iteration of the ploop_thread() to fix that.

https://jira.sw.ru/browse/PSBM-66617
Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
---
 drivers/block/ploop/dev.c     | 22 ++++++++++------------
 drivers/block/ploop/discard.c |  2 +-
 include/linux/ploop/ploop.h   |  3 +--
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index e7aec54172c0..eab6cb067708 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -230,8 +230,7 @@ static void ploop_test_and_clear_blockable(struct 
ploop_device *plo,
 }
 
 /* always called with plo->lock released */
-void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list,
-                     int keep_locked)
+void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list)
 {
        struct ploop_request * preq;
        int drop_qlen = 0;
@@ -261,8 +260,7 @@ void ploop_preq_drop(struct ploop_device * plo, struct 
list_head *drop_list,
 
        ploop_uncongest(plo);
 
-       if (!keep_locked)
-               spin_unlock_irq(&plo->lock);
+       spin_unlock_irq(&plo->lock);
 }
 
 static void merge_rw_flags_to_req(unsigned long rw,
@@ -1066,7 +1064,7 @@ out:
        blk_check_plugged(ploop_unplug, plo, sizeof(struct blk_plug_cb));
 
        if (!list_empty(&drop_list))
-               ploop_preq_drop(plo, &drop_list, 0);
+               ploop_preq_drop(plo, &drop_list);
 
        return;
 }
@@ -2964,12 +2962,12 @@ static int ploop_thread(void * data)
        set_user_nice(current, -20);
 
        blk_start_plug(&plug);
-       spin_lock_irq(&plo->lock);
        for (;;) {
                /* Convert bios to preqs early (at least before processing
                 * entry queue) to increase chances of bio merge
                 */
-       again:
+               cond_resched();
+               spin_lock_irq(&plo->lock);
                BUG_ON (!list_empty(&drop_list));
 
                process_pending_bios(plo, &drop_list);
@@ -2978,8 +2976,8 @@ static int ploop_thread(void * data)
 
                if (!list_empty(&drop_list)) {
                        spin_unlock_irq(&plo->lock);
-                       ploop_preq_drop(plo, &drop_list, 1);
-                       goto again;
+                       ploop_preq_drop(plo, &drop_list);
+                       continue;
                }
 
                if (!list_empty(&plo->ready_queue)) {
@@ -2991,7 +2989,6 @@ static int ploop_thread(void * data)
 
                        ploop_req_state_process(preq);
 
-                       spin_lock_irq(&plo->lock);
                        continue;
                }
 
@@ -3010,6 +3007,7 @@ static int ploop_thread(void * data)
                                set_bit(PLOOP_S_ATTENTION, &plo->state);
                                if (plo->active_reqs) {
                                        list_add(&preq->list, 
&plo->entry_queue);
+                                       spin_unlock_irq(&plo->lock);
                                        continue;
                                }
                                plo->barrier_reqs--;
@@ -3042,8 +3040,6 @@ static int ploop_thread(void * data)
                        spin_unlock_irq(&plo->lock);
 
                        ploop_req_state_process(preq);
-
-                       spin_lock_irq(&plo->lock);
                        continue;
                }
 
@@ -3058,8 +3054,10 @@ static int ploop_thread(void * data)
 
 wait_more:
                ploop_wait(plo, once, &plug);
+               spin_unlock_irq(&plo->lock);
                once = 0;
        }
+
        spin_unlock_irq(&plo->lock);
        blk_finish_plug(&plug);
 
diff --git a/drivers/block/ploop/discard.c b/drivers/block/ploop/discard.c
index 828ab3696469..33122497b9c5 100644
--- a/drivers/block/ploop/discard.c
+++ b/drivers/block/ploop/discard.c
@@ -66,7 +66,7 @@ int ploop_discard_fini_ioc(struct ploop_device *plo)
        spin_unlock_irq(&plo->lock);
 
        if (!list_empty(&drop_list))
-               ploop_preq_drop(plo, &drop_list, 0);
+               ploop_preq_drop(plo, &drop_list);
 
        if (plo->maintenance_type != PLOOP_MNTN_DISCARD) {
                ret = -EBUSY;
diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
index c7261c4bda57..f13b7caa4128 100644
--- a/include/linux/ploop/ploop.h
+++ b/include/linux/ploop/ploop.h
@@ -633,8 +633,7 @@ static inline struct ploop_delta * map_top_delta(struct 
ploop_map * map)
 
 void ploop_complete_io_state(struct ploop_request * preq);
 void ploop_fail_request(struct ploop_request * preq, int err);
-void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list,
-                     int keep_locked);
+void ploop_preq_drop(struct ploop_device * plo, struct list_head *drop_list);
 
 
 static inline int ploop_req_delay_fua_possible(struct ploop_request *preq)
-- 
2.13.0

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to