On 2018/07/18 19:28, Tetsuo Handa wrote:
> There are many reports which are stalling inside __getblk_gfp().

Currently 18 reports out of 65 "INFO: task hung in " reports.

  INFO: task hung in aead_recvmsg
  INFO: task hung in inode_sleep_on_writeback
  INFO: task hung in __writeback_inodes_sb_nr
  INFO: task hung in __blkdev_get (2)
  INFO: task hung in lookup_slow
  INFO: task hung in iterate_supers
  INFO: task hung in flush_work
  INFO: task hung in vfs_setxattr
  INFO: task hung in lock_mount
  INFO: task hung in __get_super
  INFO: task hung in do_unlinkat
  INFO: task hung in fat_fallocate
  INFO: task hung in generic_file_write_iter
  INFO: task hung in d_alloc_parallel
  INFO: task hung in __fdget_pos (2)
  INFO: task hung in path_openat
  INFO: task hung in do_truncate
  INFO: task hung in filename_create

> And there is horrible comment for __getblk_gfp():
> 
>   /*
>    * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
>    * which corresponds to the passed block_device, block and size. The
>    * returned buffer has its reference count incremented.
>    *
>    * __getblk_gfp() will lock up the machine if grow_dev_page's
>    * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
>    */
> 
> This report is stalling after mount() completed and process used 
> remap_file_pages().
> I think that we might need to use debug printk(). But I don't know what to 
> examine.
> 

Andrew, can you pick up this debug printk() patch?
I guess we can get the result within one week.

>From 8f55e00b21fefffbc6abd9085ac503c52a302464 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <[email protected]>
Date: Fri, 20 Jul 2018 19:29:06 +0900
Subject: [PATCH] fs/buffer.c: add debug print for __getblk_gfp() stall problem

Among syzbot's unresolved hung task reports, 18 out of 65 reports contain
__getblk_gfp() line in the backtrace. Since there is a comment block that
says that __getblk_gfp() will lock up the machine if try_to_free_buffers()
attempt from grow_dev_page() is failing, let's start from checking whether
syzbot is hitting that case. This change will be removed after the bug is
fixed.

Signed-off-by: Tetsuo Handa <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
---
 fs/buffer.c           | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/sched.h |  7 +++++++
 lib/Kconfig.debug     |  6 ++++++
 3 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index be31e28..ebf78ab 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -955,10 +955,20 @@ static sector_t blkdev_max_block(struct block_device 
*bdev, unsigned int size)
                        end_block = init_page_buffers(page, bdev,
                                                (sector_t)index << sizebits,
                                                size);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+                       current->getblk_executed |= 0x01;
+#endif
                        goto done;
                }
-               if (!try_to_free_buffers(page))
+               if (!try_to_free_buffers(page)) {
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+                       current->getblk_executed |= 0x02;
+#endif
                        goto failed;
+               }
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+               current->getblk_executed |= 0x04;
+#endif
        }
 
        /*
@@ -978,6 +988,9 @@ static sector_t blkdev_max_block(struct block_device *bdev, 
unsigned int size)
        spin_unlock(&inode->i_mapping->private_lock);
 done:
        ret = (block < end_block) ? 1 : -ENXIO;
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+       current->getblk_executed |= 0x08;
+#endif
 failed:
        unlock_page(page);
        put_page(page);
@@ -1033,6 +1046,12 @@ static sector_t blkdev_max_block(struct block_device 
*bdev, unsigned int size)
                return NULL;
        }
 
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+       current->getblk_stamp = jiffies;
+       current->getblk_executed = 0;
+       current->getblk_bh_count = 0;
+       current->getblk_bh_state = 0;
+#endif
        for (;;) {
                struct buffer_head *bh;
                int ret;
@@ -1044,6 +1063,18 @@ static sector_t blkdev_max_block(struct block_device 
*bdev, unsigned int size)
                ret = grow_buffers(bdev, block, size, gfp);
                if (ret < 0)
                        return NULL;
+
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+               if (!time_after(jiffies, current->getblk_stamp + 3 * HZ))
+                       continue;
+               printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d 
bh_state=%lx\n",
+                      current->comm, current->pid, current->getblk_executed,
+                      current->getblk_bh_count, current->getblk_bh_state);
+               current->getblk_executed = 0;
+               current->getblk_bh_count = 0;
+               current->getblk_bh_state = 0;
+               current->getblk_stamp = jiffies;
+#endif
        }
 }
 
@@ -3216,6 +3247,11 @@ int sync_dirty_buffer(struct buffer_head *bh)
  */
 static inline int buffer_busy(struct buffer_head *bh)
 {
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+       current->getblk_executed |= 0x80;
+       current->getblk_bh_count = atomic_read(&bh->b_count);
+       current->getblk_bh_state = bh->b_state;
+#endif
        return atomic_read(&bh->b_count) |
                (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
 }
@@ -3254,11 +3290,18 @@ int try_to_free_buffers(struct page *page)
        int ret = 0;
 
        BUG_ON(!PageLocked(page));
-       if (PageWriteback(page))
+       if (PageWriteback(page)) {
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+               current->getblk_executed |= 0x10;
+#endif
                return 0;
+       }
 
        if (mapping == NULL) {          /* can this still happen? */
                ret = drop_buffers(page, &buffers_to_free);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+               current->getblk_executed |= 0x20;
+#endif
                goto out;
        }
 
@@ -3282,6 +3325,9 @@ int try_to_free_buffers(struct page *page)
        if (ret)
                cancel_dirty_page(page);
        spin_unlock(&mapping->private_lock);
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+       current->getblk_executed |= 0x40;
+#endif
 out:
        if (buffers_to_free) {
                struct buffer_head *bh = buffers_to_free;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c4e0aba..95b143e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1207,6 +1207,13 @@ struct task_struct {
        unsigned long                   prev_lowest_stack;
 #endif
 
+#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT
+       unsigned long                   getblk_stamp;
+       unsigned int                    getblk_executed;
+       unsigned int                    getblk_bh_count;
+       unsigned long                   getblk_bh_state;
+#endif
+
        /*
         * New fields for task_struct should be added above here, so that
         * they are included in the randomized portion of task_struct.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c731ff9..0747ce7 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2058,3 +2058,9 @@ config IO_STRICT_DEVMEM
          if the driver using a given range cannot be disabled.
 
          If in doubt, say Y.
+
+config DEBUG_AID_FOR_SYZBOT
+       bool "Additional debug code for syzbot"
+       default n
+       help
+         This option is intended for testing by syzbot.
-- 
1.8.3.1

Reply via email to