On Tue, Mar 29, 2016 at 10:22:29PM +0800, Anand Jain wrote:
> Write and Flush errors are considered as critical errors,
> upon which the device will be brought offline and marked as
> failed. Write and Flush errors are identified using device
> error statistics.
> 
> Signed-off-by: Anand Jain <anand.j...@oracle.com>
> 
> btrfs: check for failed device and hot replace
> 
> This patch creates casualty_kthread to check for the failed
> devices, and triggers device replace.
> 
> Signed-off-by: Anand Jain <anand.j...@oracle.com>
> ---
>  fs/btrfs/ctree.h   |   2 +
>  fs/btrfs/disk-io.c | 161 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  fs/btrfs/disk-io.h |   2 +
>  fs/btrfs/volumes.c |   1 +
>  fs/btrfs/volumes.h |   4 ++
>  5 files changed, 169 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 2c185a8e92f0..36f1c29e00a0 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1569,6 +1569,7 @@ struct btrfs_fs_info {
>       struct mutex tree_log_mutex;
>       struct mutex transaction_kthread_mutex;
>       struct mutex cleaner_mutex;
> +     struct mutex casualty_mutex;
>       struct mutex chunk_mutex;
>       struct mutex volume_mutex;
>  
> @@ -1686,6 +1687,7 @@ struct btrfs_fs_info {
>       struct btrfs_workqueue *extent_workers;
>       struct task_struct *transaction_kthread;
>       struct task_struct *cleaner_kthread;
> +     struct task_struct *casualty_kthread;
>       int thread_pool_size;
>  
>       struct kobject *space_info_kobj;
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index b99329e37965..650e26e0acda 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -1869,6 +1869,153 @@ sleep:
>       return 0;
>  }
>  
> +static int btrfs_check_and_handle_casualty(void *arg)
> +{
> +     int ret;
> +     int found = 0;
> +     struct btrfs_device *device;
> +     struct btrfs_root *root = arg;
> +     struct btrfs_fs_info *fs_info = root->fs_info;
> +     struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> +
> +     btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
> +     if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
> +             btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
> +             return -EBUSY;
> +     }
> +     btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
> +
> +     ret = btrfs_check_devices(fs_devices);
> +     if (ret == 1) {
> +             /*
> +              * There were some casualties, and if its beyond a
> +              * chunk group can tolerate, then FS will already
> +              * be in readonly, so check that. And that's best
> +              * btrfs could do as of now and no replace will help.
> +              */
> +             if (fs_info->sb->s_flags & MS_RDONLY)
> +                     return -EROFS;
> +
> +             mutex_lock(&fs_devices->device_list_mutex);
> +             rcu_read_lock();
> +             list_for_each_entry_rcu(device,
> +                             &fs_devices->devices, dev_list) {
> +                     if (device->failed) {
> +                             found = 1;
> +                             break;
> +                     }
> +             }
> +             rcu_read_unlock();
> +             mutex_unlock(&fs_devices->device_list_mutex);
> +     }
> +
> +     /*
> +      * We are using the replace code which should be interrupt-able
> +      * during unmount, and as of now there is no user land stop
> +      * request that we support and this will run until its complete
> +      */
> +     if (found)
> +             ret = btrfs_auto_replace_start(root, device);
> +
> +     return ret;
> +}
> +
> +/*
> + * A kthread to check if any auto maintenance be required. This is
> + * multithread safe, and kthread is running only if
> + * fs_info->casualty_kthread is not NULL, fixme: atomic ?
> + */
> +static int casualty_kthread(void *arg)
> +{
> +     int ret;
> +     int again;
> +     struct btrfs_root *root = arg;
> +
> +     do {
> +             again = 0;
> +
> +             if (btrfs_need_cleaner_sleep(root))
> +                     goto sleep;
> +
> +             if (!mutex_trylock(&root->fs_info->casualty_mutex))
> +                     goto sleep;
> +
> +             if (btrfs_need_cleaner_sleep(root)) {
> +                     mutex_unlock(&root->fs_info->casualty_mutex);
> +                     goto sleep;
> +             }
> +
> +             ret = btrfs_check_and_handle_casualty(arg);
> +             if (ret == -EROFS) {
> +                     /*
> +                      * When checking and fixing the devices, the
> +                      * FS may be marked as RO in some situations.
> +                      * And on ROFS casualty thread has no work.
> +                      * So optimize here, to stop this thread until
> +                      * FS is back to RW.
> +                      */
> +             }
> +             mutex_unlock(&root->fs_info->casualty_mutex);
> +
> +sleep:
> +             if (!try_to_freeze() && !again) {

This block was copy-pasted from the cleaner_kthread(). 'again' variable
is not used in reality, and using of try_to_freeze() in the cleaner_kthread()
was eliminated in 'for-linus-4.6' mason's branch in the commit
838fe188 'btrfs: cleaner_kthread() doesn't need explicit freeze'.
casualty_kthread() isn't marked as freezabe too,
so this check can be removed entirely.


> +                     set_current_state(TASK_INTERRUPTIBLE);
> +                     if (!kthread_should_stop())
> +                             schedule();
> +                     __set_current_state(TASK_RUNNING);
> +             }
> +     } while (!kthread_should_stop());
> +
> +     return 0;
> +}
> +

-- 
Yauhen Kharuzhy
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to