Re: [PATCH] mpt2sas: setpci reset kernel panic fix
On Wed, Jun 17, 2015 at 11:37:53AM +0530, Nagarajkumar Narayanan wrote: Problem Description: https://bugzilla.kernel.org/show_bug.cgi?id=95101 Due to lack of synchronization between ioctl, BRM status access, pci resource removal kernel oops happen as ioctl path and BRM status access path still tries to access the removed resources kernel: BUG: unable to handle kernel paging request at c900171e Oops: [#1] SMP Patch Description: Two locks added to provide syncrhonization 1. pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and pci resource handling. PCI resource freeing will lead to free vital hardware/memory resource, which might be in use by cli/sysfs path functions resulting in Null pointer reference followed by kernel crash. To avoid the above race condition we use mutex syncrhonization which ensures the syncrhonization between cli/sysfs_show path 2. spinlock on list operations over IOCs Case: when multiple warpdrive cards(IOCs) are in use Each IOC will added to the ioc list stucture on initialization. Watchdog threads run at regular intervals to check IOC for any fault conditions which will trigger the dead_ioc thread to deallocate pci resource, resulting deleting the IOC netry from list, this deletion need to protected by spinlock to enusre that ioc removal is syncrhonized, if not synchronized it might lead to list_del corruption as the ioc list is traversed in cli path please find the patch to apply on scsi/mpt2sas driver http://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ From ba692140278e6e2b660896c32206b26dac98d215 Mon Sep 17 00:00:00 2001 From: Nagarajkumar Narayanan nagarajkumar.naraya...@seagate.com Date: Thu, 19 Mar 2015 12:02:07 +0530 Subject: [PATCH] mpt2sas setpci kernel oops fix Signed-off-by: Nagarajkumar Narayanan nagarajkumar.naraya...@seagate.com --- drivers/scsi/mpt2sas/mpt2sas_base.c | 10 +++ drivers/scsi/mpt2sas/mpt2sas_base.h | 20 +- drivers/scsi/mpt2sas/mpt2sas_ctl.c | 48 + drivers/scsi/mpt2sas/mpt2sas_scsih.c | 32 ++- 4 files changed, 102 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index 11248de..d2a498c 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -108,13 +108,18 @@ _scsih_set_fwfault_debug(const char *val, struct kernel_param *kp) { int ret = param_set_int(val, kp); struct MPT2SAS_ADAPTER *ioc; + unsigned long flags; if (ret) return ret; + /* global ioc spinlock to protect controller list on list operations */ + mpt2sas_initialize_gioc_lock(); printk(KERN_INFO setting fwfault_debug(%d)\n, mpt2sas_fwfault_debug); + spin_lock_irqsave(gioc_lock, flags); list_for_each_entry(ioc, mpt2sas_ioc_list, list) ioc-fwfault_debug = mpt2sas_fwfault_debug; + spin_unlock_irqrestore(gioc_lock, flags); return 0; } @@ -4436,6 +4441,9 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc) __func__)); if (ioc-chip_phys ioc-chip) { + /* synchronizing freeing resource with pci_access_mutex lock */ + if (ioc-is_warpdrive) + mutex_lock(ioc-pci_access_mutex); _base_mask_interrupts(ioc); ioc-shost_recovery = 1; _base_make_ioc_ready(ioc, CAN_SLEEP, SOFT_RESET); @@ -4454,6 +4462,8 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc) pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); } + if (ioc-is_warpdrive) + mutex_unlock(ioc-pci_access_mutex); return; } diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index caff8d1..a0d26f0 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -799,6 +799,12 @@ typedef void (*MPT2SAS_FLUSH_RUNNING_CMDS)(struct MPT2SAS_ADAPTER *ioc); * @delayed_tr_list: target reset link list * @delayed_tr_volume_list: volume target reset link list * @@temp_sensors_count: flag to carry the number of temperature sensors + * @pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and + * pci resource handling. PCI resource freeing will lead to free + * vital hardware/memory resource, which might be in use by cli/sysfs + * path functions resulting in Null pointer reference followed by kernel + * crash. To avoid the above race condition we use mutex syncrhonization + * which ensures the syncrhonization between cli/sysfs_show path */ struct MPT2SAS_ADAPTER { struct list_head list; @@ -1015,6 +1021,7 @@ struct MPT2SAS_ADAPTER { u8 mfg_pg10_hide_flag; u8 hide_drives; + struct mutex pci_access_mutex; }; typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, @@ -1023,6 +1030,17 @@ typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, /* base shared API */ extern struct list_head
[PATCH] mpt2sas: setpci reset kernel panic fix
Problem Description: https://bugzilla.kernel.org/show_bug.cgi?id=95101 Due to lack of synchronization between ioctl, BRM status access, pci resource removal kernel oops happen as ioctl path and BRM status access path still tries to access the removed resources kernel: BUG: unable to handle kernel paging request at c900171e Oops: [#1] SMP Patch Description: Two locks added to provide syncrhonization 1. pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and pci resource handling. PCI resource freeing will lead to free vital hardware/memory resource, which might be in use by cli/sysfs path functions resulting in Null pointer reference followed by kernel crash. To avoid the above race condition we use mutex syncrhonization which ensures the syncrhonization between cli/sysfs_show path 2. spinlock on list operations over IOCs Case: when multiple warpdrive cards(IOCs) are in use Each IOC will added to the ioc list stucture on initialization. Watchdog threads run at regular intervals to check IOC for any fault conditions which will trigger the dead_ioc thread to deallocate pci resource, resulting deleting the IOC netry from list, this deletion need to protected by spinlock to enusre that ioc removal is syncrhonized, if not synchronized it might lead to list_del corruption as the ioc list is traversed in cli path please find the patch to apply on scsi/mpt2sas driver http://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/ From ba692140278e6e2b660896c32206b26dac98d215 Mon Sep 17 00:00:00 2001 From: Nagarajkumar Narayanan nagarajkumar.naraya...@seagate.com Date: Thu, 19 Mar 2015 12:02:07 +0530 Subject: [PATCH] mpt2sas setpci kernel oops fix Signed-off-by: Nagarajkumar Narayanan nagarajkumar.naraya...@seagate.com --- drivers/scsi/mpt2sas/mpt2sas_base.c | 10 +++ drivers/scsi/mpt2sas/mpt2sas_base.h | 20 +- drivers/scsi/mpt2sas/mpt2sas_ctl.c | 48 + drivers/scsi/mpt2sas/mpt2sas_scsih.c | 32 ++- 4 files changed, 102 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index 11248de..d2a498c 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -108,13 +108,18 @@ _scsih_set_fwfault_debug(const char *val, struct kernel_param *kp) { int ret = param_set_int(val, kp); struct MPT2SAS_ADAPTER *ioc; + unsigned long flags; if (ret) return ret; + /* global ioc spinlock to protect controller list on list operations */ + mpt2sas_initialize_gioc_lock(); printk(KERN_INFO setting fwfault_debug(%d)\n, mpt2sas_fwfault_debug); + spin_lock_irqsave(gioc_lock, flags); list_for_each_entry(ioc, mpt2sas_ioc_list, list) ioc-fwfault_debug = mpt2sas_fwfault_debug; + spin_unlock_irqrestore(gioc_lock, flags); return 0; } @@ -4436,6 +4441,9 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc) __func__)); if (ioc-chip_phys ioc-chip) { + /* synchronizing freeing resource with pci_access_mutex lock */ + if (ioc-is_warpdrive) + mutex_lock(ioc-pci_access_mutex); _base_mask_interrupts(ioc); ioc-shost_recovery = 1; _base_make_ioc_ready(ioc, CAN_SLEEP, SOFT_RESET); @@ -4454,6 +4462,8 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc) pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); } + if (ioc-is_warpdrive) + mutex_unlock(ioc-pci_access_mutex); return; } diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index caff8d1..a0d26f0 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -799,6 +799,12 @@ typedef void (*MPT2SAS_FLUSH_RUNNING_CMDS)(struct MPT2SAS_ADAPTER *ioc); * @delayed_tr_list: target reset link list * @delayed_tr_volume_list: volume target reset link list * @@temp_sensors_count: flag to carry the number of temperature sensors + * @pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and + * pci resource handling. PCI resource freeing will lead to free + * vital hardware/memory resource, which might be in use by cli/sysfs + * path functions resulting in Null pointer reference followed by kernel + * crash. To avoid the above race condition we use mutex syncrhonization + * which ensures the syncrhonization between cli/sysfs_show path */ struct MPT2SAS_ADAPTER { struct list_head list; @@ -1015,6 +1021,7 @@ struct MPT2SAS_ADAPTER { u8 mfg_pg10_hide_flag; u8 hide_drives; + struct mutex pci_access_mutex; }; typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, @@ -1023,6 +1030,17 @@ typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, /* base shared API */ extern struct list_head mpt2sas_ioc_list; +/* spinlock on list operations over IOCs ++ * Case: when multiple warpdrive cards(IOCs) are in use ++ * Each IOC will added to the ioc list stucture on initialization. ++ * Watchdog