Re: [PATCH] tcmu: allow userspace to reset netlink
On 04/04/2018 09:38 PM, Xiubo Li wrote: > On 2018/4/5 8:47, Mike Christie wrote: >> On 04/02/2018 06:42 AM, xiu...@redhat.com wrote: >>> From: Xiubo Li>>> >>> This patch adds 1 tcmu attr to reset and complete all the blocked >>> netlink waiting threads. It's used when the userspace daemon like >>> tcmu-runner has crashed or forced to shutdown just before the >>> netlink requests be replied to the kernel, then the netlink requeting >>> threads will get stuck forever. We must reboot the machine to recover >>> from it and by this the rebootng is not a must then. >>> >>> The netlink reset operation should be done before the userspace daemon >>> could receive and handle the netlink requests to be safe. >>> >>> Signed-off-by: Xiubo Li >>> --- >>> drivers/target/target_core_user.c | 99 >>> --- >>> 1 file changed, 93 insertions(+), 6 deletions(-) >>> >>> diff --git a/drivers/target/target_core_user.c >>> b/drivers/target/target_core_user.c >>> index 4ad89ea..dc8879d 100644 >>> --- a/drivers/target/target_core_user.c >>> +++ b/drivers/target/target_core_user.c >>> @@ -103,9 +103,13 @@ struct tcmu_hba { >>> #define TCMU_CONFIG_LEN 256 >>> +static spinlock_t nl_complete_lock; >>> +static struct idr complete_wait_udevs = IDR_INIT; >>> + >>> struct tcmu_nl_cmd { >>> /* wake up thread waiting for reply */ >>> -struct completion complete; >>> +bool complete; >>> + >>> int cmd; >>> int status; >>> }; >>> @@ -159,12 +163,17 @@ struct tcmu_dev { >>> spinlock_t nl_cmd_lock; >>> struct tcmu_nl_cmd curr_nl_cmd; >>> -/* wake up threads waiting on curr_nl_cmd */ >>> +/* wake up threads waiting on nl_cmd_wq */ >>> wait_queue_head_t nl_cmd_wq; >>> +/* complete thread waiting complete_wq */ >>> +wait_queue_head_t complete_wq; >>> + >>> char dev_config[TCMU_CONFIG_LEN]; >>> int nl_reply_supported; >>> + >>> +uint32_t dev_id; >>> }; >>> #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, >>> se_dev) >>> @@ -251,6 +260,56 @@ static int tcmu_get_global_max_data_area(char >>> *buffer, >>>"Max MBs allowed to be allocated to all the tcmu device's " >>>"data areas."); >>> +static void tcmu_complete_wake_up(struct tcmu_dev *udev) >>> +{ >>> +struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd; >>> + >>> +spin_lock(_complete_lock); >>> +nl_cmd->complete = true; >>> +wake_up(>complete_wq); >>> +spin_unlock(_complete_lock); >>> +} >>> + >>> +static void tcmu_complete_wake_up_all(void) >>> +{ >>> +struct tcmu_nl_cmd *nl_cmd; >>> +struct tcmu_dev *udev; >>> +int i; >>> + >>> +spin_lock(_complete_lock); >>> +idr_for_each_entry(_wait_udevs, udev, i) { >>> +nl_cmd = >curr_nl_cmd; >>> +nl_cmd->complete = true; >>> +wake_up(>complete_wq); >>> +} >>> +spin_unlock(_complete_lock); >>> +} >>> + >>> +static int tcmu_complete_wait(struct tcmu_dev *udev) >>> +{ >>> +struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd; >>> +uint32_t dev_id; >>> + >>> +spin_lock(_complete_lock); >>> +dev_id = idr_alloc(_wait_udevs, udev, 1, USHRT_MAX, >>> GFP_NOWAIT); >>> +if (dev_id < 0) { >>> +pr_err("tcmu: Could not allocate dev id.\n"); >>> +return dev_id; >>> +} >>> +udev->dev_id = dev_id; >> dev_id is never used. > It will be used when the device is being removed. Ah yeah, you are right. Bad comment on my part. I was thinking/writing wrt if we used a list or helper around devices_idr. > >> I think if you just wanted to loop over all the devices you could just >> use a list. >> >> Or, >> >> Just add a helper around target_core_device.c:devices_idr that just >> gives you the tcmu devices. >> >> >> >>> +spin_unlock(_complete_lock); >>> + >>> +pr_debug("sleeping for nl reply\n"); >>> +wait_event(udev->complete_wq, nl_cmd->complete); >> I don't think you will need the complete field then or this function. >> >> >>> + >>> +spin_lock(_complete_lock); >>> +nl_cmd->complete = false; >>> +idr_remove(_wait_udevs, dev_id); >>> +spin_unlock(_complete_lock); >>> + >>> +return 0; >>> +} >>> + >>> /* multicast group */ >>> enum tcmu_multicast_groups { >>> TCMU_MCGRP_CONFIG, >>> @@ -311,7 +370,7 @@ static int tcmu_genl_cmd_done(struct genl_info >>> *info, int completed_cmd) >>> if (!is_removed) >>>target_undepend_item(>dev_group.cg_item); >>> if (!ret) >>> -complete(_cmd->complete); >>> +tcmu_complete_wake_up(udev); >>> return ret; >>> } >>> @@ -1258,6 +1317,7 @@ static struct se_device >>> *tcmu_alloc_device(struct se_hba *hba, const char *name) >>> timer_setup(>cmd_timer, tcmu_cmd_timedout, 0); >>> init_waitqueue_head(>nl_cmd_wq); >>> +init_waitqueue_head(>complete_wq); >>> spin_lock_init(>nl_cmd_lock); >>> INIT_RADIX_TREE(>data_blocks, GFP_KERNEL);
Re: [PATCH] tcmu: allow userspace to reset netlink
On 2018/4/5 8:47, Mike Christie wrote: On 04/02/2018 06:42 AM, xiu...@redhat.com wrote: From: Xiubo LiThis patch adds 1 tcmu attr to reset and complete all the blocked netlink waiting threads. It's used when the userspace daemon like tcmu-runner has crashed or forced to shutdown just before the netlink requests be replied to the kernel, then the netlink requeting threads will get stuck forever. We must reboot the machine to recover from it and by this the rebootng is not a must then. The netlink reset operation should be done before the userspace daemon could receive and handle the netlink requests to be safe. Signed-off-by: Xiubo Li --- drivers/target/target_core_user.c | 99 --- 1 file changed, 93 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 4ad89ea..dc8879d 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -103,9 +103,13 @@ struct tcmu_hba { #define TCMU_CONFIG_LEN 256 +static spinlock_t nl_complete_lock; +static struct idr complete_wait_udevs = IDR_INIT; + struct tcmu_nl_cmd { /* wake up thread waiting for reply */ - struct completion complete; + bool complete; + int cmd; int status; }; @@ -159,12 +163,17 @@ struct tcmu_dev { spinlock_t nl_cmd_lock; struct tcmu_nl_cmd curr_nl_cmd; - /* wake up threads waiting on curr_nl_cmd */ + /* wake up threads waiting on nl_cmd_wq */ wait_queue_head_t nl_cmd_wq; + /* complete thread waiting complete_wq */ + wait_queue_head_t complete_wq; + char dev_config[TCMU_CONFIG_LEN]; int nl_reply_supported; + + uint32_t dev_id; }; #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev) @@ -251,6 +260,56 @@ static int tcmu_get_global_max_data_area(char *buffer, "Max MBs allowed to be allocated to all the tcmu device's " "data areas."); +static void tcmu_complete_wake_up(struct tcmu_dev *udev) +{ + struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd; + + spin_lock(_complete_lock); + nl_cmd->complete = true; + wake_up(>complete_wq); + spin_unlock(_complete_lock); +} + +static void tcmu_complete_wake_up_all(void) +{ + struct tcmu_nl_cmd *nl_cmd; + struct tcmu_dev *udev; + int i; + + spin_lock(_complete_lock); + idr_for_each_entry(_wait_udevs, udev, i) { + nl_cmd = >curr_nl_cmd; + nl_cmd->complete = true; + wake_up(>complete_wq); + } + spin_unlock(_complete_lock); +} + +static int tcmu_complete_wait(struct tcmu_dev *udev) +{ + struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd; + uint32_t dev_id; + + spin_lock(_complete_lock); + dev_id = idr_alloc(_wait_udevs, udev, 1, USHRT_MAX, GFP_NOWAIT); + if (dev_id < 0) { + pr_err("tcmu: Could not allocate dev id.\n"); + return dev_id; + } + udev->dev_id = dev_id; dev_id is never used. It will be used when the device is being removed. I think if you just wanted to loop over all the devices you could just use a list. Or, Just add a helper around target_core_device.c:devices_idr that just gives you the tcmu devices. + spin_unlock(_complete_lock); + + pr_debug("sleeping for nl reply\n"); + wait_event(udev->complete_wq, nl_cmd->complete); I don't think you will need the complete field then or this function. + + spin_lock(_complete_lock); + nl_cmd->complete = false; + idr_remove(_wait_udevs, dev_id); + spin_unlock(_complete_lock); + + return 0; +} + /* multicast group */ enum tcmu_multicast_groups { TCMU_MCGRP_CONFIG, @@ -311,7 +370,7 @@ static int tcmu_genl_cmd_done(struct genl_info *info, int completed_cmd) if (!is_removed) target_undepend_item(>dev_group.cg_item); if (!ret) - complete(_cmd->complete); + tcmu_complete_wake_up(udev); return ret; } @@ -1258,6 +1317,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) timer_setup(>cmd_timer, tcmu_cmd_timedout, 0); init_waitqueue_head(>nl_cmd_wq); + init_waitqueue_head(>complete_wq); spin_lock_init(>nl_cmd_lock); INIT_RADIX_TREE(>data_blocks, GFP_KERNEL); @@ -1462,7 +1522,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p) kfree(udev->uio_info.name); kfree(udev->name); + + spin_lock(_complete_lock); + idr_remove(_wait_udevs, udev->dev_id); kfree(udev); + spin_unlock(_complete_lock); } static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd) @@ -1555,7 +1619,6 @@ static void tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd) memset(nl_cmd, 0, sizeof(*nl_cmd));
Re: [PATCH v2] Fix DID_OK handling in __scsi_error_from_host_byte()
On Wed, 2018-04-04 at 10:53 -0700, Bart Van Assche wrote: > Commit e39a97353e53 modified __scsi_error_from_host_byte() such > that that function translates DID_OK into BLK_STS_OK. However, > the description of that commit is wrong: it mentions that commit > 2a842acab109 introduced a bug in __scsi_error_from_host_byte() > although that commit did not change the behavior of that function. > Additionally, commit e39a97353e53 introduced a severe bug: it causes > commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE > to be completed with BLK_STS_OK. Fix __scsi_error_from_host_byte() > by only translating good status values into BLK_STS_OK. > > Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in > __scsi_error_from_host_byte()") > Reported-by: Damien Le Moal> Signed-off-by: Bart Van Assche > Cc: Hannes Reinecke > Cc: Douglas Gilbert > Cc: Damien Le Moal > Cc: Christoph Hellwig > Cc: sta...@vger.kernel.org > --- > > Changes compared to v1: > - Modified __scsi_error_from_host_byte() such that it again returns > BLK_STS_OK for CONDITION MET and other result codes that represent > success. > > drivers/scsi/scsi_lib.c | 8 +++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > index 74a39db57d49..1496b34af409 100644 > --- a/drivers/scsi/scsi_lib.c > +++ b/drivers/scsi/scsi_lib.c > @@ -736,7 +736,13 @@ static blk_status_t __scsi_error_from_host_byte(struct > scsi_cmnd *cmd, > { > switch (host_byte(result)) { > case DID_OK: > - return BLK_STS_OK; > + /* > + * Also check the other bytes than the status byte in > result > + * to handle the case when a SCSI LLD sets result to > + * DRIVER_SENSE << 24 without setting > SAM_STAT_CHECK_CONDITION. > + */ > + return scsi_status_is_good(result) && (result & ~0xff) == 0 > ? > + BLK_STS_OK : BLK_STS_IOERR; This fixes the problem on my system. Tested-by: Damien Le Moal -- Damien Le Moal Western Digital
Re: [PATCH] tcmu: allow userspace to reset netlink
On 04/02/2018 06:42 AM, xiu...@redhat.com wrote: > From: Xiubo Li> > This patch adds 1 tcmu attr to reset and complete all the blocked > netlink waiting threads. It's used when the userspace daemon like > tcmu-runner has crashed or forced to shutdown just before the > netlink requests be replied to the kernel, then the netlink requeting > threads will get stuck forever. We must reboot the machine to recover > from it and by this the rebootng is not a must then. > > The netlink reset operation should be done before the userspace daemon > could receive and handle the netlink requests to be safe. > > Signed-off-by: Xiubo Li > --- > drivers/target/target_core_user.c | 99 > --- > 1 file changed, 93 insertions(+), 6 deletions(-) > > diff --git a/drivers/target/target_core_user.c > b/drivers/target/target_core_user.c > index 4ad89ea..dc8879d 100644 > --- a/drivers/target/target_core_user.c > +++ b/drivers/target/target_core_user.c > @@ -103,9 +103,13 @@ struct tcmu_hba { > > #define TCMU_CONFIG_LEN 256 > > +static spinlock_t nl_complete_lock; > +static struct idr complete_wait_udevs = IDR_INIT; > + > struct tcmu_nl_cmd { > /* wake up thread waiting for reply */ > - struct completion complete; > + bool complete; > + > int cmd; > int status; > }; > @@ -159,12 +163,17 @@ struct tcmu_dev { > > spinlock_t nl_cmd_lock; > struct tcmu_nl_cmd curr_nl_cmd; > - /* wake up threads waiting on curr_nl_cmd */ > + /* wake up threads waiting on nl_cmd_wq */ > wait_queue_head_t nl_cmd_wq; > > + /* complete thread waiting complete_wq */ > + wait_queue_head_t complete_wq; > + > char dev_config[TCMU_CONFIG_LEN]; > > int nl_reply_supported; > + > + uint32_t dev_id; > }; > > #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev) > @@ -251,6 +260,56 @@ static int tcmu_get_global_max_data_area(char *buffer, >"Max MBs allowed to be allocated to all the tcmu device's " >"data areas."); > > +static void tcmu_complete_wake_up(struct tcmu_dev *udev) > +{ > + struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd; > + > + spin_lock(_complete_lock); > + nl_cmd->complete = true; > + wake_up(>complete_wq); > + spin_unlock(_complete_lock); > +} > + > +static void tcmu_complete_wake_up_all(void) > +{ > + struct tcmu_nl_cmd *nl_cmd; > + struct tcmu_dev *udev; > + int i; > + > + spin_lock(_complete_lock); > + idr_for_each_entry(_wait_udevs, udev, i) { > + nl_cmd = >curr_nl_cmd; > + nl_cmd->complete = true; > + wake_up(>complete_wq); > + } > + spin_unlock(_complete_lock); > +} > + > +static int tcmu_complete_wait(struct tcmu_dev *udev) > +{ > + struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd; > + uint32_t dev_id; > + > + spin_lock(_complete_lock); > + dev_id = idr_alloc(_wait_udevs, udev, 1, USHRT_MAX, > GFP_NOWAIT); > + if (dev_id < 0) { > + pr_err("tcmu: Could not allocate dev id.\n"); > + return dev_id; > + } > + udev->dev_id = dev_id; dev_id is never used. I think if you just wanted to loop over all the devices you could just use a list. Or, Just add a helper around target_core_device.c:devices_idr that just gives you the tcmu devices. > + spin_unlock(_complete_lock); > + > + pr_debug("sleeping for nl reply\n"); > + wait_event(udev->complete_wq, nl_cmd->complete); I don't think you will need the complete field then or this function. > + > + spin_lock(_complete_lock); > + nl_cmd->complete = false; > + idr_remove(_wait_udevs, dev_id); > + spin_unlock(_complete_lock); > + > + return 0; > +} > + > /* multicast group */ > enum tcmu_multicast_groups { > TCMU_MCGRP_CONFIG, > @@ -311,7 +370,7 @@ static int tcmu_genl_cmd_done(struct genl_info *info, int > completed_cmd) > if (!is_removed) >target_undepend_item(>dev_group.cg_item); > if (!ret) > - complete(_cmd->complete); > + tcmu_complete_wake_up(udev); > return ret; > } > > @@ -1258,6 +1317,7 @@ static struct se_device *tcmu_alloc_device(struct > se_hba *hba, const char *name) > timer_setup(>cmd_timer, tcmu_cmd_timedout, 0); > > init_waitqueue_head(>nl_cmd_wq); > + init_waitqueue_head(>complete_wq); > spin_lock_init(>nl_cmd_lock); > > INIT_RADIX_TREE(>data_blocks, GFP_KERNEL); > @@ -1462,7 +1522,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p) > > kfree(udev->uio_info.name); > kfree(udev->name); > + > + spin_lock(_complete_lock); > + idr_remove(_wait_udevs, udev->dev_id); > kfree(udev); > + spin_unlock(_complete_lock); > } > > static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd) > @@ -1555,7 +1619,6 @@ static void tcmu_init_genl_cmd_reply(struct tcmu_dev > *udev,
Re: 4.15.14 crash with iscsi target and dvd
Bart Van Assche wrote: > On Sun, 2018-04-01 at 14:27 -0400, Wakko Warner wrote: > > Wakko Warner wrote: > > > Wakko Warner wrote: > > > > I tested 4.14.32 last night with the same oops. 4.9.91 works fine. > > > > From the initiator, if I do cat /dev/sr1 > /dev/null it works. If I > > > > mount > > > > /dev/sr1 and then do find -type f | xargs cat > /dev/null the target > > > > crashes. I'm using the builtin iscsi target with pscsi. I can burn > > > > from > > > > the initiator with out problems. I'll test other kernels between 4.9 > > > > and > > > > 4.14. > > > > > > So I've tested 4.x.y where x one of 10 11 12 14 15 and y is the latest > > > patch > > > (except for 4.15 which was 1 behind) > > > Each of these kernels crash within seconds or immediate of doing find > > > -type > > > f | xargs cat > /dev/null from the initiator. > > > > I tried 4.10.0. It doesn't completely lockup the system, but the device > > that was used hangs. So from the initiator, it's /dev/sr1 and from the > > target it's /dev/sr0. Attempting to read /dev/sr0 after the oops causes the > > process to hang in D state. > > Hello Wakko, > > Thank you for having narrowed down this further. I think that you encountered > a regression either in the block layer core or in the SCSI core. Unfortunately > the number of changes between kernel versions v4.9 and v4.10 in these two > subsystems is huge. I see two possible ways forward: > - Either that you perform a bisect to identify the patch that introduced this > regression. However, I'm not sure whether you are familiar with the bisect > process. > - Or that you identify the command that triggers this crash such that others > can reproduce this issue without needing access to your setup. > > How about reproducing this crash with the below patch applied on top of > kernel v4.15.x? The additional output sent by this patch to the system log > should allow us to reproduce this issue by submitting the same SCSI command > with sg_raw. Sorry for not getting back in touch. My internet was down. I haven't tried the patch yet. I'll try to get to that tomorrow. The system with the issue is busy and I can't reboot it right now.
Re: [PATCH 2/2] sd_zbc: Avoid errors due to sd_zbc_setup() execution
Bart, On 4/5/18 00:22, Bart Van Assche wrote: > On Wed, 2018-04-04 at 17:54 +0900, Damien Le Moal wrote: >> Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() >> is called from sd_probe_async() and since sd_revalidate_disk() calls >> sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called >> concurrently with operations referencing a drive zone bitmaps and number > > > Should "a" be changed into "the"? Yes. >> [Damien] Updated commit message and changed nr_zones/bitmap swap order. > > Updating the number of zones after having updated the bitmap pointers is not > sufficient to avoid trouble if the number of zones as reported by the drive > changes while I/O is in progress. With the current implementation if the > number of zones changes the seq_zones_bitmap is cleared. Can this cause > trouble for the mq-deadline scheduler? Additionally, CPUs other than x86 can > reorder store operations. Even worse, a CPU could cache the zone bitmap > pointers which means that at least RCU protection + kfree_rcu() is needed to > avoid trouble. I think we either should handle this case properly or issue a > kernel warning. OK. Let's work on that. -- Damien Le Moal, Western Digital
Re: [PATCH 1/2] sd_zbc: Avoid errors due to sd_zbc_check_zone_size() execution
Bart, On 4/5/18 00:08, Bart Van Assche wrote: > On 04/04/18 01:54, Damien Le Moal wrote: >> static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) >> { >> +u64 sdkp_zone_blocks = sdkp->zone_blocks; > > Shouldn't this variable be initialized to zero such that zone size > changes are accepted even if the SAME field in the REPORT ZONES response > is zero? sdkp_zone_blocks will be 0 when sd_zbc_check_zone_size() is called for the first scan of a disk and will hold the current disk value if sd_zbc_check_zone_size() is called on a revalidate after first scan. If the initial value is 0, there is no check and the variable is first initialized. Otherwise, the value is compared to the zone size reported. In both cases, the zone size change will be cought. But granted, setting the value initially to 0 is easier to understand. I will also change: } else { sdkp->zone_blocks = zone_blocks; sdkp->zone_shift = ilog2(zone_blocks); } to } else if (sdkp->zone_blocks != zone_blocks) { sdkp->zone_blocks = zone_blocks; sdkp->zone_shift = ilog2(zone_blocks); } to make things really clear. Similarly to a capacity change, It may also be good to add a warning message. After all, on a drive swap, we can have the case where the capacity does not change, but the zone size does. Sending a v2. Best regards. -- Damien Le Moal, Western Digital
[GIT PULL] first round of SCSI updates for the 4.16+ merge window
This is mostly updates of the usual drivers: arcmsr, qla2xx, lpfc, ufs, mpt3sas, hisi_sas. In addition we have removed several really old drivers: sym53c416, NCR53c406a, fdomain, fdomain_cs and removed the old scsi_module.c initialization from all remaining drivers. Plus an assortment of bug fixes, initialization errors and other minor fixes. This time there's a really nasty merge between the fixes and misc branches of the SCSI tree which I've resolved in the merge commit: the non obvious part is that you have to remove some lines from qla2xxx/qla_gs.c to avoid a compile failure which would cause bisection problems, so I've done that and documented it in the merge commit. If you'd like to do it yourself, let me know and I'll send the two branches separately. The patch is available here: git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git scsi-for-linus The short changelog is: Adrian Hunter (1): scsi: ufs: Add support for Auto-Hibernate Idle Timer Arnd Bergmann (7): scsi: mpt3sas: clarify mmio pointer types scsi: qedi: fix build regression scsi: sym53c416: avoid section mismatch with LTO scsi: NCR53c406a: avoid section mismatch with LTO scsi: qedf: use correct strncpy() size scsi: qedf: fix LTO-enabled build scsi: qedi: fix building with LTO Bart Van Assche (15): scsi: ufs: Fix kernel-doc errors and warnings scsi: sd_zbc: Fix sd_zbc_get_seq_zones() kernel-doc header scsi: libsas: Fix kernel-doc headers scsi: core: Reduce number of scsi_test_unit_ready() retries scsi: core: Move the eh_deadline module parameter definition scsi: core: scmd_eh_abort_handler(): Add a comment scsi: pmcraid: Use sgl_alloc_order() and sgl_free_order() scsi: pmcraid: Remove an unused structure member scsi: ipr: Use sgl_alloc_order() and sgl_free_order() scsi: scsi_debug: Simplify request tag decoding scsi: qla2xxx: Fix function argument descriptions scsi: qla4xxx: Move an array from a .h into a .c file scsi: qla4xxx: Remove unused symbols scsi: qla2xxx: Remove unused symbols scsi: qla2xxx: Use %p for printing pointers Ching Huang (4): scsi: arcmsr: Change driver version to v1.40.00.05-20180309 scsi: arcmsr: Sleep to avoid CPU stuck too long for waiting adapter ready scsi: arcmsr: Handle adapter removed due to thunderbolt cable disconnection. scsi: arcmsr: Rename ACB_F_BUS_HANG_ON to ACB_F_ADAPTER_REMOVED for adapter hot-plug Christoph Hellwig (11): scsi: remove the old scsi_module.c initialization model scsi: remove the sym53c416 driver scsi: remove the NCR53c406a driver scsi: remove the fdomain and fdomain_cs drivers scsi: mvme147: stop using scsi_module.c scsi: esas2r: remove initialization / cleanup dead wood scsi: core: unexport scsi_host_set_state scsi: documentation: remove ChangeLog.1992-1997 scsi: aha1740: stop using scsi_unregister scsi: ips: don't set .detect and .release in the host template scsi: dpt_i2o: stop using scsi_unregister Colin Ian King (7): scsi: qla2xxx: fix spelling mistake: "existant" -> "existent" scsi: lpfc: make several unions static, fix non-ANSI prototype scsi: scsi_transport_spi: make two const arrays static, shrinks object size scsi: pmcraid: remove redundant initializations of pointer 'ioadl' scsi: isci: remove redundant initialization to 'bit' scsi: libfc: remove redundant initialization of 'disc' scsi: qedf: remove redundant initialization of 'fcport' Dan Carpenter (3): scsi: dpt_i2o: use after free in __adpt_reset() scsi: dpt_i2o: use after free in adpt_release() scsi: atp870u: 64 bit bug in atp885_init() Darren Trapp (10): scsi: qla2xxx: Cleanup code to improve FC-NVMe error handling scsi: qla2xxx: Fix FC-NVMe IO abort during driver reset scsi: qla2xxx: Fix retry for PRLI RJT with reason of BUSY scsi: qla2xxx: Remove nvme_done_list scsi: qla2xxx: Return busy if rport going away scsi: qla2xxx: Fix n2n_ae flag to prevent dev_loss on PDB change scsi: qla2xxx: Add FC-NVMe abort processing scsi: qla2xxx: Add changes for devloss timeout in driver scsi: qla2xxx: Set IIDMA and fcport state before qla_nvme_register_remote() scsi: qla2xxx: Restore ZIO threshold setting Don Brace (1): scsi: smartpqi: update driver version Douglas Gilbert (2): scsi: core: Make SCSI Status CONDITION MET equivalent to GOOD scsi: scsi_debug: implement IMMED bit Finn Thain (1): scsi: jazz_esp, sun3x_esp: Pass struct device pointer in dma calls Geert Uytterhoeven (1): scsi: hisi_sas: Remove depends on HAS_DMA in case of platform dependency Hannes Reinecke (1): scsi: raid_class: Add 'JBOD' RAID level James Smart (43): scsi: lpfc: Change Copyright of 12.0.0.1 modified files to 2018 scsi: lpfc: update driver
RE: aacraid code passes GFP_DMA32 to kmalloc this will not work
> -Original Message- > From: Hans de Goede [mailto:hdego...@redhat.com] > Sent: Thursday, March 29, 2018 8:51 AM > To: dl-esc-Aacraid Linux Driver; James E.J. > Bottomley ; Martin K. Petersen > ; SCSI development list s...@vger.kernel.org>; Linux Kernel Mailing List > > Subject: aacraid code passes GFP_DMA32 to kmalloc this will not work > > > Hi All, > > Since I made the same mistake myself I've done a quick grep for > GFP_DMA32 in the kernel and drivers/scsi/aacraid/commctrl.c came up as a > result of this grep, it does: > > p = kmalloc(sg_count[i], > GFP_KERNEL|GFP_DMA32); > > But kmalloc always returns memory from the normal memory-zone, if you need > memory from a specific memory-zone like the > DMA32 zone, you must use the dma allocation functions (which from a quick > glance at the code seems appropriate here) or directly call alloc_page or > __get_free_page. Hi Hans, I did run into this issue myself recently ... Thanks for a reminder to send in a patch to correct this. -Dave > > Regards, > > Hans
Re: usercopy whitelist woe in scsi_sense_cache
Hi. 04.04.2018 23:25, Kees Cook wrote: Actually, I can trigger a BUG too: [ 129.259213] usercopy: Kernel memory exposure attempt detected from SLUB object 'scsi_sense_cache' (offset 119, size 22)! Wow, yeah, that's totally outside the slub object_size. How did you trigger this? Just luck or something specific? Just luck, I suppose. It usually comes after the first warning if you wait long enough (maybe, a couple of extra minutes). To give you an idea regarding variety of offsets, I've summarised kernel log from the server: $ sudo journalctl -kb | grep "Kernel memory exposure attempt detected" | grep -oE 'offset [0-9]+, size [0-9]+' | sort | uniq -c 9 offset 107, size 22 6 offset 108, size 22 8 offset 109, size 22 7 offset 110, size 22 5 offset 111, size 22 5 offset 112, size 22 2 offset 113, size 22 2 offset 114, size 22 1 offset 115, size 22 1 offset 116, size 22 1 offset 119, size 22 1 offset 85, size 22 I'd really like to understand how the buffer position can be changing... I'd expect that to break all kinds of things (i.e. free()ing the slab later would break too...) I haven't checked the code yet, but the first thing that comes to my mind is some uninitialised variable. Just guessing here, though. Thanks for the report! I hope someone more familiar with sg_io() can help explain the changing buffer offset... :P Hopefully, SCSI people are Cc'ed here properly… Thanks! Regards, Oleksandr
Re: usercopy whitelist woe in scsi_sense_cache
On Wed, Apr 4, 2018 at 1:49 PM, Oleksandr Natalenkowrote: > Hi. > > On středa 4. dubna 2018 22:21:53 CEST Kees Cook wrote: >> >> ... >> That means scsi_sense_cache should be 96 bytes in size? But a 22 byte >> read starting at offset 94 happened? That seems like a 20 byte read >> beyond the end of the SLUB object? Though if it were reading past the >> actual end of the object, I'd expect the hardened usercopy BUG (rather >> than the WARN) to kick in. Ah, it looks like >> /sys/kernel/slab/scsi_sense_cache/slab_size shows this to be 128 bytes >> of actual allocation, so the 20 bytes doesn't strictly overlap another >> object (hence no BUG): >> ... > > > Actually, I can trigger a BUG too: > > [ 129.259213] usercopy: Kernel memory exposure attempt detected from SLUB > object 'scsi_sense_cache' (offset 119, size 22)! Wow, yeah, that's totally outside the slub object_size. How did you trigger this? Just luck or something specific? > [ 129.265167] [ cut here ] > [ 129.267579] kernel BUG at mm/usercopy.c:100! > > And also offset can be different, as you may see: > > [ 55.993224] Bad or missing usercopy whitelist? Kernel memory exposure > attempt detected from SLUB object 'scsi_sense_cache' (offset 76, size 22)! > [ 55.998678] WARNING: CPU: 0 PID: 1305 at mm/usercopy.c:81 usercopy_warn > +0x7e/0xa0 > > It looks like only the size stays the same. I'd really like to understand how the buffer position can be changing... I'd expect that to break all kinds of things (i.e. free()ing the slab later would break too...) >> Can you send me your .config? What SCSI drivers are you using in the >> VM and on the real server? > > This is an Arch kernel with a config available here [1]. > > For both server and VM "lspci -vv" shows "ahci" in use. Is this what you are > asking for? I think so, yeah. >> Are you able to see what ioctl()s smartctl is issuing? I'll try to >> reproduce this on my end... > > As per [2], strace shows "SG_IO" requests. Is this detailed enough? That's useful, yeah. I'll try Douglas's suggestion of "smartctl -r scsiioctl,3 ..." too. > Thanks for looking into it. Thanks for the report! I hope someone more familiar with sg_io() can help explain the changing buffer offset... :P -Kees -- Kees Cook Pixel Security
Re: usercopy whitelist woe in scsi_sense_cache
Hi. On středa 4. dubna 2018 22:21:53 CEST Kees Cook wrote: ... That means scsi_sense_cache should be 96 bytes in size? But a 22 byte read starting at offset 94 happened? That seems like a 20 byte read beyond the end of the SLUB object? Though if it were reading past the actual end of the object, I'd expect the hardened usercopy BUG (rather than the WARN) to kick in. Ah, it looks like /sys/kernel/slab/scsi_sense_cache/slab_size shows this to be 128 bytes of actual allocation, so the 20 bytes doesn't strictly overlap another object (hence no BUG): ... Actually, I can trigger a BUG too: [ 129.259213] usercopy: Kernel memory exposure attempt detected from SLUB object 'scsi_sense_cache' (offset 119, size 22)! [ 129.265167] [ cut here ] [ 129.267579] kernel BUG at mm/usercopy.c:100! And also offset can be different, as you may see: [ 55.993224] Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLUB object 'scsi_sense_cache' (offset 76, size 22)! [ 55.998678] WARNING: CPU: 0 PID: 1305 at mm/usercopy.c:81 usercopy_warn +0x7e/0xa0 It looks like only the size stays the same. Can you send me your .config? What SCSI drivers are you using in the VM and on the real server? This is an Arch kernel with a config available here [1]. For both server and VM "lspci -vv" shows "ahci" in use. Is this what you are asking for? Are you able to see what ioctl()s smartctl is issuing? I'll try to reproduce this on my end... As per [2], strace shows "SG_IO" requests. Is this detailed enough? Thanks for looking into it. Regards, Oleksandr [1] https://git.archlinux.org/svntogit/packages.git/plain/trunk/config? h=packages/linux=d7625be23f83416491d202d5cea96e5a871fb216 [2] https://gist.github.com/6f58f8891468aeba1ab2cc9f45668735
Re: usercopy whitelist woe in scsi_sense_cache
On 2018-04-04 04:32 PM, Kees Cook wrote: On Wed, Apr 4, 2018 at 12:07 PM, Oleksandr Natalenkowrote: [ 261.262135] Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLUB object 'scsi_sense_cache' (offset 94, size 22)! I can easily reproduce it with a qemu VM and 2 virtual SCSI disks by calling smartctl in a loop and doing some usual background I/O. The warning is triggered within 3 minutes or so (not instantly). Also: Can you send me your .config? What SCSI drivers are you using in the VM and on the real server? Are you able to see what ioctl()s smartctl is issuing? I'll try to reproduce this on my end... smartctl -r scsiioctl,3
Re: usercopy whitelist woe in scsi_sense_cache
On 2018-04-04 04:21 PM, Kees Cook wrote: On Wed, Apr 4, 2018 at 12:07 PM, Oleksandr Natalenkowrote: With v4.16 I get the following dump while using smartctl: [...] [ 261.262135] Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLUB object 'scsi_sense_cache' (offset 94, size 22)! [...] [ 261.345976] Call Trace: [ 261.350620] __check_object_size+0x130/0x1a0 [ 261.355775] sg_io+0x269/0x3f0 [ 261.360729] ? path_lookupat+0xaa/0x1f0 [ 261.364027] ? current_time+0x18/0x70 [ 261.366684] scsi_cmd_ioctl+0x257/0x410 [ 261.369871] ? xfs_bmapi_read+0x1c3/0x340 [xfs] [ 261.372231] sd_ioctl+0xbf/0x1a0 [sd_mod] [ 261.375456] blkdev_ioctl+0x8ca/0x990 [ 261.381156] ? read_null+0x10/0x10 [ 261.384984] block_ioctl+0x39/0x40 [ 261.388739] do_vfs_ioctl+0xa4/0x630 [ 261.392624] ? vfs_write+0x164/0x1a0 [ 261.396658] SyS_ioctl+0x74/0x80 [ 261.399563] do_syscall_64+0x74/0x190 [ 261.402685] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 This is: sg_io+0x269/0x3f0: blk_complete_sghdr_rq at block/scsi_ioctl.c:280 (inlined by) sg_io at block/scsi_ioctl.c:376 which is: if (req->sense_len && hdr->sbp) { int len = min((unsigned int) hdr->mx_sb_len, req->sense_len); if (!copy_to_user(hdr->sbp, req->sense, len)) hdr->sb_len_wr = len; else ret = -EFAULT; } [...] I can easily reproduce it with a qemu VM and 2 virtual SCSI disks by calling smartctl in a loop and doing some usual background I/O. The warning is triggered within 3 minutes or so (not instantly). Initially, it was produced on my server after a kernel update (because disks are monitored with smartctl via Zabbix). Looks like the thing was introduced with 0afe76e88c57d91ef5697720aed380a339e3df70. Any idea how to deal with this please? If needed, I can provide any additional info, and also I'm happy/ready to test any proposed patches. Interesting, and a little confusing. So, what's strange here is that the scsi_sense_cache already has a full whitelist: kmem_cache_create_usercopy("scsi_sense_cache", SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, 0, SCSI_SENSE_BUFFERSIZE, NULL); Arg 2 is the buffer size, arg 5 is the whitelist offset (0), and the whitelist size (same as arg2). In other words, the entire buffer should be whitelisted. include/scsi/scsi_cmnd.h says: #define SCSI_SENSE_BUFFERSIZE 96 That means scsi_sense_cache should be 96 bytes in size? But a 22 byte read starting at offset 94 happened? That seems like a 20 byte read beyond the end of the SLUB object? Though if it were reading past the actual end of the object, I'd expect the hardened usercopy BUG (rather than the WARN) to kick in. Ah, it looks like /sys/kernel/slab/scsi_sense_cache/slab_size shows this to be 128 bytes of actual allocation, so the 20 bytes doesn't strictly overlap another object (hence no BUG): /sys/kernel/slab/scsi_sense_cache# grep . object_size usersize slab_size object_size:96 usersize:96 slab_size:128 Ah, right, due to SLAB_HWCACHE_ALIGN, the allocation is rounded up to the next cache line size, so there's 32 bytes of padding to reach 128. James or Martin, is this over-read "expected" behavior? i.e. does the sense cache buffer usage ever pull the ugly trick of silently expanding its allocation into the space the slab allocator has given it? If not, this looks like a real bug. What I don't see is how req->sense is _not_ at offset 0 in the scsi_sense_cache object... Looking at the smartctl SCSI code it pulls 32 byte sense buffers. Can't see 22 anywhere relevant in its code. There are two types of sense: fixed and descriptor: with fixed you seldom need more than 18 bytes (but it can only represent 32 bit LBAs). The other type has a header and 0 or more variable length descriptors. If decoding of descriptor sense went wrong you might end up at offset 94. But not with smartctl Doug Gilbert
Re: usercopy whitelist woe in scsi_sense_cache
On Wed, Apr 4, 2018 at 12:07 PM, Oleksandr Natalenkowrote: > [ 261.262135] Bad or missing usercopy whitelist? Kernel memory exposure > attempt detected from SLUB object 'scsi_sense_cache' (offset 94, size 22)! > I can easily reproduce it with a qemu VM and 2 virtual SCSI disks by calling > smartctl in a loop and doing some usual background I/O. The warning is > triggered within 3 minutes or so (not instantly). Also: Can you send me your .config? What SCSI drivers are you using in the VM and on the real server? Are you able to see what ioctl()s smartctl is issuing? I'll try to reproduce this on my end... -Kees -- Kees Cook Pixel Security
Re: usercopy whitelist woe in scsi_sense_cache
On Wed, Apr 4, 2018 at 12:07 PM, Oleksandr Natalenkowrote: > With v4.16 I get the following dump while using smartctl: > [...] > [ 261.262135] Bad or missing usercopy whitelist? Kernel memory exposure > attempt detected from SLUB object 'scsi_sense_cache' (offset 94, size 22)! > [...] > [ 261.345976] Call Trace: > [ 261.350620] __check_object_size+0x130/0x1a0 > [ 261.355775] sg_io+0x269/0x3f0 > [ 261.360729] ? path_lookupat+0xaa/0x1f0 > [ 261.364027] ? current_time+0x18/0x70 > [ 261.366684] scsi_cmd_ioctl+0x257/0x410 > [ 261.369871] ? xfs_bmapi_read+0x1c3/0x340 [xfs] > [ 261.372231] sd_ioctl+0xbf/0x1a0 [sd_mod] > [ 261.375456] blkdev_ioctl+0x8ca/0x990 > [ 261.381156] ? read_null+0x10/0x10 > [ 261.384984] block_ioctl+0x39/0x40 > [ 261.388739] do_vfs_ioctl+0xa4/0x630 > [ 261.392624] ? vfs_write+0x164/0x1a0 > [ 261.396658] SyS_ioctl+0x74/0x80 > [ 261.399563] do_syscall_64+0x74/0x190 > [ 261.402685] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 This is: sg_io+0x269/0x3f0: blk_complete_sghdr_rq at block/scsi_ioctl.c:280 (inlined by) sg_io at block/scsi_ioctl.c:376 which is: if (req->sense_len && hdr->sbp) { int len = min((unsigned int) hdr->mx_sb_len, req->sense_len); if (!copy_to_user(hdr->sbp, req->sense, len)) hdr->sb_len_wr = len; else ret = -EFAULT; } > [...] > I can easily reproduce it with a qemu VM and 2 virtual SCSI disks by calling > smartctl in a loop and doing some usual background I/O. The warning is > triggered within 3 minutes or so (not instantly). > > Initially, it was produced on my server after a kernel update (because disks > are monitored with smartctl via Zabbix). > > Looks like the thing was introduced with > 0afe76e88c57d91ef5697720aed380a339e3df70. > > Any idea how to deal with this please? If needed, I can provide any additional > info, and also I'm happy/ready to test any proposed patches. Interesting, and a little confusing. So, what's strange here is that the scsi_sense_cache already has a full whitelist: kmem_cache_create_usercopy("scsi_sense_cache", SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, 0, SCSI_SENSE_BUFFERSIZE, NULL); Arg 2 is the buffer size, arg 5 is the whitelist offset (0), and the whitelist size (same as arg2). In other words, the entire buffer should be whitelisted. include/scsi/scsi_cmnd.h says: #define SCSI_SENSE_BUFFERSIZE 96 That means scsi_sense_cache should be 96 bytes in size? But a 22 byte read starting at offset 94 happened? That seems like a 20 byte read beyond the end of the SLUB object? Though if it were reading past the actual end of the object, I'd expect the hardened usercopy BUG (rather than the WARN) to kick in. Ah, it looks like /sys/kernel/slab/scsi_sense_cache/slab_size shows this to be 128 bytes of actual allocation, so the 20 bytes doesn't strictly overlap another object (hence no BUG): /sys/kernel/slab/scsi_sense_cache# grep . object_size usersize slab_size object_size:96 usersize:96 slab_size:128 Ah, right, due to SLAB_HWCACHE_ALIGN, the allocation is rounded up to the next cache line size, so there's 32 bytes of padding to reach 128. James or Martin, is this over-read "expected" behavior? i.e. does the sense cache buffer usage ever pull the ugly trick of silently expanding its allocation into the space the slab allocator has given it? If not, this looks like a real bug. What I don't see is how req->sense is _not_ at offset 0 in the scsi_sense_cache object... -Kees -- Kees Cook Pixel Security
Re: [PATCH v2] Fix DID_OK handling in __scsi_error_from_host_byte()
On 04/04/2018 10:53 AM, Bart Van Assche wrote: > Commit e39a97353e53 modified __scsi_error_from_host_byte() such > that that function translates DID_OK into BLK_STS_OK. However, > the description of that commit is wrong: it mentions that commit > 2a842acab109 introduced a bug in __scsi_error_from_host_byte() > although that commit did not change the behavior of that function. > Additionally, commit e39a97353e53 introduced a severe bug: it causes > commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE > to be completed with BLK_STS_OK. Fix __scsi_error_from_host_byte() > by only translating good status values into BLK_STS_OK. > > Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in > __scsi_error_from_host_byte()") > Reported-by: Damien Le Moal> Signed-off-by: Bart Van Assche > Cc: Hannes Reinecke > Cc: Douglas Gilbert > Cc: Damien Le Moal > Cc: Christoph Hellwig > Cc: sta...@vger.kernel.org > --- > > Changes compared to v1: > - Modified __scsi_error_from_host_byte() such that it again returns > BLK_STS_OK for CONDITION MET and other result codes that represent > success. > > drivers/scsi/scsi_lib.c | 8 +++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > index 74a39db57d49..1496b34af409 100644 > --- a/drivers/scsi/scsi_lib.c > +++ b/drivers/scsi/scsi_lib.c > @@ -736,7 +736,13 @@ static blk_status_t __scsi_error_from_host_byte(struct > scsi_cmnd *cmd, > { > switch (host_byte(result)) { > case DID_OK: > - return BLK_STS_OK; > + /* > + * Also check the other bytes than the status byte in result > + * to handle the case when a SCSI LLD sets result to > + * DRIVER_SENSE << 24 without setting SAM_STAT_CHECK_CONDITION. > + */ > + return scsi_status_is_good(result) && (result & ~0xff) == 0 ? > + BLK_STS_OK : BLK_STS_IOERR; > case DID_TRANSPORT_FAILFAST: > return BLK_STS_TRANSPORT; > case DID_TARGET_FAILURE: > Reviewed-by: Lee Duncan
[PATCH] target: change default dbroot to /etc/target
The dbroot (target PR database root directory) is configurable but default to /var/target, a historic value. But the reason for adding configurability was to move the target directory out of /var. This is because the File Hierarchy Standard v3.0 mandates that this "target" directory not be in /var. See https://refspecs.linuxfoundation.org/FHS_3.0/fhs-3.0.pdf This change moves the default from /var/target to /etc/target, but this value is still configurable, so those wishing to continue to use /var/target can still do so. Signed-off-by: Lee Duncan--- drivers/target/target_core_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 1d5afc3ae017..34eccef975b7 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -165,7 +165,7 @@ extern struct se_portal_group xcopy_pt_tpg; /* target_core_configfs.c */ #define DB_ROOT_LEN4096 -#defineDB_ROOT_DEFAULT "/var/target" +#defineDB_ROOT_DEFAULT "/etc/target" extern char db_root[]; -- 2.13.6
usercopy whitelist woe in scsi_sense_cache
Hi, Kees, David et al. With v4.16 I get the following dump while using smartctl: === [ 261.260617] [ cut here ] [ 261.262135] Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLUB object 'scsi_sense_cache' (offset 94, size 22)! [ 261.267672] WARNING: CPU: 2 PID: 27041 at mm/usercopy.c:81 usercopy_warn +0x7e/0xa0 [ 261.273624] Modules linked in: nls_iso8859_1 nls_cp437 vfat fat kvm_intel kvm iTCO_wdt ppdev irqbypass bochs_drm ttm iTCO_vendor_support drm_kms_helper drm psmouse input_leds led_class pcspkr joydev intel_agp parport_pc mousedev evdev syscopyarea intel_gtt i2c_i801 sysfillrect parport rtc_cmos sysimgblt qemu_fw_cfg mac_hid agpgart fb_sys_fops lpc_ich ip_tables x_tables xfs dm_thin_pool dm_persistent_data dm_bio_prison dm_bufio libcrc32c crc32c_generic dm_crypt algif_skcipher af_alg dm_mod raid10 md_mod hid_generic usbhid hid sr_mod cdrom sd_mod crct10dif_pclmul uhci_hcd crc32_pclmul crc32c_intel ghash_clmulni_intel pcbc serio_raw ahci atkbd aesni_intel xhci_pci aes_x86_64 ehci_pci libahci crypto_simd libps2 glue_helper xhci_hcd ehci_hcd libata cryptd usbcore usb_common i8042 serio virtio_scsi scsi_mod [ 261.300752] virtio_blk virtio_net virtio_pci virtio_ring virtio [ 261.305534] CPU: 2 PID: 27041 Comm: smartctl Not tainted 4.16.0-1-ARCH #1 [ 261.309936] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [ 261.313668] RIP: 0010:usercopy_warn+0x7e/0xa0 [ 261.315653] RSP: 0018:ab5aca6cfc40 EFLAGS: 00010286 [ 261.320038] RAX: RBX: 8e8cd893605e RCX: 0001 [ 261.322215] RDX: 8001 RSI: 83eb4672 RDI: [ 261.325680] RBP: 0016 R08: R09: 0282 [ 261.328462] R10: 83e896b1 R11: 0001 R12: 0001 [ 261.330584] R13: 8e8cd8936074 R14: 8e8cd893605e R15: 0016 [ 261.332748] FS: 7f5a81bdf040() GS:8e8cdf70() knlGS: [ 261.337929] CS: 0010 DS: ES: CR0: 80050033 [ 261.343128] CR2: 7fff3a6790a8 CR3: 18228006 CR4: 00160ee0 [ 261.345976] Call Trace: [ 261.350620] __check_object_size+0x130/0x1a0 [ 261.355775] sg_io+0x269/0x3f0 [ 261.360729] ? path_lookupat+0xaa/0x1f0 [ 261.364027] ? current_time+0x18/0x70 [ 261.366684] scsi_cmd_ioctl+0x257/0x410 [ 261.369871] ? xfs_bmapi_read+0x1c3/0x340 [xfs] [ 261.372231] sd_ioctl+0xbf/0x1a0 [sd_mod] [ 261.375456] blkdev_ioctl+0x8ca/0x990 [ 261.381156] ? read_null+0x10/0x10 [ 261.384984] block_ioctl+0x39/0x40 [ 261.388739] do_vfs_ioctl+0xa4/0x630 [ 261.392624] ? vfs_write+0x164/0x1a0 [ 261.396658] SyS_ioctl+0x74/0x80 [ 261.399563] do_syscall_64+0x74/0x190 [ 261.402685] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 261.414154] RIP: 0033:0x7f5a8115ed87 [ 261.417184] RSP: 002b:7fff3a65a458 EFLAGS: 0246 ORIG_RAX: 0010 [ 261.427362] RAX: ffda RBX: 7fff3a65a700 RCX: 7f5a8115ed87 [ 261.432075] RDX: 7fff3a65a470 RSI: 2285 RDI: 0003 [ 261.436200] RBP: 7fff3a65a750 R08: 0010 R09: [ 261.446689] R10: R11: 0246 R12: 55b5481d9ce0 [ 261.450059] R13: R14: 55b5481d3550 R15: 00da [ 261.455103] Code: 48 c7 c0 f1 af e5 83 48 0f 44 c2 41 50 51 41 51 48 89 f9 49 89 f1 4d 89 d8 4c 89 d2 48 89 c6 48 c7 c7 48 b0 e5 83 e8 32 a7 e3 ff <0f> 0b 48 83 c4 18 c3 48 c7 c6 44 0d e5 83 49 89 f1 49 89 f3 eb [ 261.467988] ---[ end trace 75034b3832c364e4 ]--- === I can easily reproduce it with a qemu VM and 2 virtual SCSI disks by calling smartctl in a loop and doing some usual background I/O. The warning is triggered within 3 minutes or so (not instantly). Initially, it was produced on my server after a kernel update (because disks are monitored with smartctl via Zabbix). Looks like the thing was introduced with 0afe76e88c57d91ef5697720aed380a339e3df70. Any idea how to deal with this please? If needed, I can provide any additional info, and also I'm happy/ready to test any proposed patches. Thanks. Regards, Oleksandr
Re: [PATCH v2] Fix DID_OK handling in __scsi_error_from_host_byte()
On 2018-04-04 01:53 PM, Bart Van Assche wrote: Commit e39a97353e53 modified __scsi_error_from_host_byte() such that that function translates DID_OK into BLK_STS_OK. However, the description of that commit is wrong: it mentions that commit 2a842acab109 introduced a bug in __scsi_error_from_host_byte() although that commit did not change the behavior of that function. Additionally, commit e39a97353e53 introduced a severe bug: it causes commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE to be completed with BLK_STS_OK. Fix __scsi_error_from_host_byte() by only translating good status values into BLK_STS_OK. Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()") Reported-by: Damien Le MoalSigned-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Douglas Gilbert Reviewed-by: Douglas Gilbert Cc: Damien Le Moal Cc: Christoph Hellwig Cc: sta...@vger.kernel.org --- There is some urgency here as the faulty patch is in lk 4.16.0 release. My patch was prepared independently of seeing this one. Both do the job (mine has a redundant status_byte(result) in the conditional). And the function name is now completely misleading but due to the urgency, that can wait for another day. Changes compared to v1: - Modified __scsi_error_from_host_byte() such that it again returns BLK_STS_OK for CONDITION MET and other result codes that represent success. drivers/scsi/scsi_lib.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 74a39db57d49..1496b34af409 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -736,7 +736,13 @@ static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd, { switch (host_byte(result)) { case DID_OK: - return BLK_STS_OK; + /* +* Also check the other bytes than the status byte in result +* to handle the case when a SCSI LLD sets result to +* DRIVER_SENSE << 24 without setting SAM_STAT_CHECK_CONDITION. +*/ + return scsi_status_is_good(result) && (result & ~0xff) == 0 ? + BLK_STS_OK : BLK_STS_IOERR; case DID_TRANSPORT_FAILFAST: return BLK_STS_TRANSPORT; case DID_TARGET_FAILURE:
Re: [PATCH] scsi: fix __scsi_error_from_host_byte() breakage
On Wed, 2018-04-04 at 14:14 -0400, Douglas Gilbert wrote: > Patch e39a97353e53 titled "scsi: core: return BLK_STS_OK for DID_OK in > __scsi_error_from_host_byte()" attempted to make that function return > BLK_STS_OK when it was given host_byte(result)==DID_OK. While that seems > sensible, it failed to take into account that there may be errors present > in the driver_byte and the status byte. Add those checks and expand > description of function accordingly. Hello Doug, Please clarify why you posted a new patch instead of reviewing "[PATCH v2] Fix DID_OK handling in __scsi_error_from_host_byte()". Did you perhaps miss that patch? Thanks, Bart.
[PATCH] scsi: fix __scsi_error_from_host_byte() breakage
Patch e39a97353e53 titled "scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()" attempted to make that function return BLK_STS_OK when it was given host_byte(result)==DID_OK. While that seems sensible, it failed to take into account that there may be errors present in the driver_byte and the status byte. Add those checks and expand description of function accordingly. Signed-off-by: Douglas Gilbert--- The function would be better called blk_status_from_scsi_result(). Is the leading "__" needed? drivers/scsi/scsi_lib.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index aaf485e36450..6a8bf312d82f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -727,13 +727,18 @@ static bool scsi_end_request(struct request *req, blk_status_t error, * @cmd: SCSI command (unused) * @result:scsi error code * - * Translate SCSI error code into block errors. + * Translate SCSI error code into block errors. In the case of result==DID_OK + * also check driver and status bytes for errors. */ static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result) { switch (host_byte(result)) { case DID_OK: + if (driver_byte(result) != DRIVER_OK) + return BLK_STS_IOERR; + else if (status_byte(result) && !scsi_status_is_good(result)) + return BLK_STS_IOERR; /* e.g. reservation conflict */ return BLK_STS_OK; case DID_TRANSPORT_FAILFAST: return BLK_STS_TRANSPORT; -- 2.14.1
[PATCH v2] Fix DID_OK handling in __scsi_error_from_host_byte()
Commit e39a97353e53 modified __scsi_error_from_host_byte() such that that function translates DID_OK into BLK_STS_OK. However, the description of that commit is wrong: it mentions that commit 2a842acab109 introduced a bug in __scsi_error_from_host_byte() although that commit did not change the behavior of that function. Additionally, commit e39a97353e53 introduced a severe bug: it causes commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE to be completed with BLK_STS_OK. Fix __scsi_error_from_host_byte() by only translating good status values into BLK_STS_OK. Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()") Reported-by: Damien Le MoalSigned-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Douglas Gilbert Cc: Damien Le Moal Cc: Christoph Hellwig Cc: sta...@vger.kernel.org --- Changes compared to v1: - Modified __scsi_error_from_host_byte() such that it again returns BLK_STS_OK for CONDITION MET and other result codes that represent success. drivers/scsi/scsi_lib.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 74a39db57d49..1496b34af409 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -736,7 +736,13 @@ static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd, { switch (host_byte(result)) { case DID_OK: - return BLK_STS_OK; + /* +* Also check the other bytes than the status byte in result +* to handle the case when a SCSI LLD sets result to +* DRIVER_SENSE << 24 without setting SAM_STAT_CHECK_CONDITION. +*/ + return scsi_status_is_good(result) && (result & ~0xff) == 0 ? + BLK_STS_OK : BLK_STS_IOERR; case DID_TRANSPORT_FAILFAST: return BLK_STS_TRANSPORT; case DID_TARGET_FAILURE: -- 2.16.2
Re: [PATCH] Revert "scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"
On 2018-04-04 12:30 PM, Bart Van Assche wrote: The description of commit e39a97353e53 is wrong: it mentions that commit 2a842acab109 introduced a bug in __scsi_error_from_host_byte() although that commit did not change the behavior of that function. Additionally, that commit introduced a severe bug: it causes commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE to be completed with BLK_STS_OK. Although that commit claims to fix a bug it does not mention which bug it fixes. Hence revert that commit. That patch arose from the thread started by me titled: [PATCH v2] Make SCSI Status CONDITION MET equivalent to GOOD on 26 February this year. Hannes Reinecke made the case for reducing the helper scsi_io_completion_nz_result() to a single returned value (an idea that was applied and later reverted). He wrote: > Hmm. Can't we return blk_stat from this function, and adjusting the > 'result' value after it with an if-clause like > > if (blk_stat == BLK_STS_OK) >result = 0; > > That would cleanup up the function and avoid having (essentially) two > return values. > > The only problem here is that __scsi_error_from_hostbyte() will return > BLK_STS_IOERR if result == 0; doubt that is intended. > And I guess it'll affect this issue, too. > > Mind sending a separate patch for that? Actually Hannes had already sent a patch for that before I had time to react (on the same day). That is the patch you now plan to revert. In Hannes' defence (and I reviewed it) it is counter intuitive that for callers to work properly, this function supplied with DID_OK in the result argument should return BLK_STS_IOERR. Please find out why that is so and craft a comment so nobody else falls down this particular rabbit hole. Plus add 'case DID_OK:' before the 'default:' to emphasis the point. Doug Gilbert Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()") Reported-by: Damien Le MoalSigned-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Douglas Gilbert Cc: Damien Le Moal Cc: Christoph Hellwig Cc: sta...@vger.kernel.org --- drivers/scsi/scsi_lib.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 74a39db57d49..71f5b010684c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -735,8 +735,6 @@ static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result) { switch (host_byte(result)) { - case DID_OK: - return BLK_STS_OK; case DID_TRANSPORT_FAILFAST: return BLK_STS_TRANSPORT; case DID_TARGET_FAILURE:
[PATCH] Revert "scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"
The description of commit e39a97353e53 is wrong: it mentions that commit 2a842acab109 introduced a bug in __scsi_error_from_host_byte() although that commit did not change the behavior of that function. Additionally, that commit introduced a severe bug: it causes commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE to be completed with BLK_STS_OK. Although that commit claims to fix a bug it does not mention which bug it fixes. Hence revert that commit. Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()") Reported-by: Damien Le MoalSigned-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Douglas Gilbert Cc: Damien Le Moal Cc: Christoph Hellwig Cc: sta...@vger.kernel.org --- drivers/scsi/scsi_lib.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 74a39db57d49..71f5b010684c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -735,8 +735,6 @@ static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result) { switch (host_byte(result)) { - case DID_OK: - return BLK_STS_OK; case DID_TRANSPORT_FAILFAST: return BLK_STS_TRANSPORT; case DID_TARGET_FAILURE: -- 2.16.2
[no subject]
пользователь веб-почты Обратите внимание, что 95% ваших писем, полученных после обновления сервера веб-почты в последнее время в нашей базе данных, были отложены. Регулярно получать и отправлять свои сообщения. Техническая команда нашей веб-почты обновит вашу учетную запись в течение 3 рабочих дней. Если вы этого не сделаете, ваша учетная запись будет временно приостановлена нашими службами. Чтобы повторно подтвердить свой почтовый ящик, пришлите следующую информацию: обычный: Имя пользователя: пароль: Подтвердите Пароль: Предупреждение!! Если это не позволит обновлять учетные записи в течение двух дней после получения электронной почты, они будут постоянно потерять учетные записи владельцев учетных записей веб-почты. Спасибо за ваше сотрудничество! Copyright 2017-2018 Служба технической поддержки WebMail, Inc.
Re: [PATCHv2] scsi: Fix failed request error code
On 2018-04-04 03:27 AM, Hannes Reinecke wrote: With the introduction of commit e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but lacking additional sense information will have a return code set to BLK_STS_OK. This results in the request issuer to see successful request execution despite the failure. An example of such case is an unaligned write on a host managed ZAC disk connected to a SAS HBA with a malfunctioning SAT. The unaligned write command gets aborted but has no additional sense information. sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00 print_req_error: I/O error, dev sde, sector 274726920 In scsi_io_completion(), sense key handling to not change the request error code and success being reported to the issuer. Fix this by making sure that the error code always indicates an error if scsi_io_completion() decide that the action to be taken for a failed command is to not retry it and terminate it immediately (ACTION_FAIL) . Signed-off-by: Damien Le MoalSigned-off-by: Hannes Reinecke Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()") Cc: --- drivers/scsi/scsi_lib.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 393f9db8f41b..9389c41e2829 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -905,6 +905,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) goto requeue; error = __scsi_error_from_host_byte(cmd, result); + /* +* If the hostbyte was DID_OK, but the sense code is valid +* we always should set BLK_STS_IOERR. +*/ + if (error == BLK_STS_OK && sense_valid) && !sense_deferred Doug Gilbert + error = BLK_STS_IOERR; if (host_byte(result) == DID_RESET) { /* Third party bus reset or reset for error recovery
Re: [PATCHv2] scsi: Fix failed request error code
On Wed, 2018-04-04 at 09:27 +0200, Hannes Reinecke wrote: > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > index 393f9db8f41b..9389c41e2829 100644 > --- a/drivers/scsi/scsi_lib.c > +++ b/drivers/scsi/scsi_lib.c > @@ -905,6 +905,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned > int good_bytes) > goto requeue; > > error = __scsi_error_from_host_byte(cmd, result); > + /* > + * If the hostbyte was DID_OK, but the sense code is valid > + * we always should set BLK_STS_IOERR. > + */ > + if (error == BLK_STS_OK && sense_valid) > + error = BLK_STS_IOERR; > > if (host_byte(result) == DID_RESET) { > /* Third party bus reset or reset for error recovery __scsi_error_from_host_byte() has two callers. Why does this patch only update one of these two callers? Regarding commit e39a97353e53, the description of that commit is as follows: "When converting __scsi_error_from_host_byte() to BLK_STS error codes the case DID_OK was forgotten, resulting in it always returning an error." However, the comment above that function reads as follows: "translate SCSI error code into errno". If I have a look at the v4.12 SCSI core (before the blkstatus_t conversion) then I see that __scsi_error_from_host_byte() never returns 0. That means that the description of commit e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()") is wrong. Does that mean that commit e39a97353e53 should be reverted? Thanks, Bart.
Re: [PATCH 2/2] sd_zbc: Avoid errors due to sd_zbc_setup() execution
On Wed, 2018-04-04 at 17:54 +0900, Damien Le Moal wrote: > Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() > is called from sd_probe_async() and since sd_revalidate_disk() calls > sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called > concurrently with operations referencing a drive zone bitmaps and number Should "a" be changed into "the"? > [Damien] Updated commit message and changed nr_zones/bitmap swap order. Updating the number of zones after having updated the bitmap pointers is not sufficient to avoid trouble if the number of zones as reported by the drive changes while I/O is in progress. With the current implementation if the number of zones changes the seq_zones_bitmap is cleared. Can this cause trouble for the mq-deadline scheduler? Additionally, CPUs other than x86 can reorder store operations. Even worse, a CPU could cache the zone bitmap pointers which means that at least RCU protection + kfree_rcu() is needed to avoid trouble. I think we either should handle this case properly or issue a kernel warning. Thanks, Bart.
Re: [PATCH 1/2] sd_zbc: Avoid errors due to sd_zbc_check_zone_size() execution
On 04/04/18 01:54, Damien Le Moal wrote: static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) { + u64 sdkp_zone_blocks = sdkp->zone_blocks; Shouldn't this variable be initialized to zero such that zone size changes are accepted even if the SAME field in the REPORT ZONES response is zero? Thanks, Bart.
RE: [PATCH] aacraid: Insure command thread is not recursively stopped
> -Original Message- > From: Dave Carroll [mailto:david.carr...@microsemi.com] > Sent: Wednesday, April 4, 2018 3:21 AM > To: Martin K . Petersen; James Bottomley > > Cc: Dave Carroll ; linux-scsi s...@vger.kernel.org>; dl-esc-Aacraid Linux Driver > ; Scott Benesh > Subject: [PATCH] aacraid: Insure command thread is not recursively stopped > > If a recursive IOP_RESET is invoked, usually due to the eh_thread handling > errors after the first reset, be sure we flag that the command thread has > been stopped to avoid an Oops of the form; > > [ 336.620256] CPU: 28 PID: 1193 Comm: scsi_eh_0 Kdump: loaded Not > tainted 4.14.0-49.el7a.ppc64le #1 > [ 336.620297] task: c03fd630b800 task.stack: c03fd61a4000 > [ 336.620326] NIP: c0176794 LR: c013038c CTR: > c024bc10 > [ 336.620361] REGS: c03fd61a7720 TRAP: 0300 Not tainted (4.14.0- > 49.el7a.ppc64le) > [ 336.620395] MSR: 90009033 CR: > 22084022 XER: 2004 > [ 336.620435] CFAR: c0130388 DAR: DSISR: > 4000 SOFTE: 1 > [ 336.620435] GPR00: c013038c c03fd61a79a0 c14c7e00 > > [ 336.620435] GPR04: 000c 000c > 90009033 0477 > [ 336.620435] GPR08: 0477 > c00810f7d940 > [ 336.620435] GPR12: c024bc10 c7a33400 > c01708a8 c03fe3b881d8 > [ 336.620435] GPR16: c03fe3b88060 c03fd61a7d10 f000 > 001e > [ 336.620435] GPR20: 0001 c0ebf1a0 > 0001 c03fe3b88000 > [ 336.620435] GPR24: 0003 0002 > c03fe3b88840 c03fe3b887e8 > [ 336.620435] GPR28: c03fe3b88000 c03fc8181788 > c03fc8181700 > [ 336.620750] NIP [c0176794] exit_creds+0x34/0x160 > [ 336.620775] LR [c013038c] __put_task_struct+0x8c/0x1f0 > [ 336.620804] Call Trace: > [ 336.620817] [c03fd61a79a0] [c03fe3b88000] 0xc03fe3b88000 > (unreliable) > [ 336.620853] [c03fd61a79d0] [c013038c] > __put_task_struct+0x8c/0x1f0 > [ 336.620889] [c03fd61a7a00] [c0171418] > kthread_stop+0x1e8/0x1f0 > [ 336.620922] [c03fd61a7a40] [c00810f7448c] > aac_reset_adapter+0x14c/0x8d0 [aacraid] > [ 336.620959] [c03fd61a7b00] [c00810f60174] > aac_eh_host_reset+0x84/0x100 [aacraid] > [ 336.621010] [c03fd61a7b30] [c0864f24] > scsi_try_host_reset+0x74/0x180 > [ 336.621046] [c03fd61a7bb0] [c0867ac0] > scsi_eh_ready_devs+0xc00/0x14d0 > [ 336.625165] [c03fd61a7ca0] [c08699e0] > scsi_error_handler+0x550/0x730 > [ 336.632101] [c03fd61a7dc0] [c0170a08] kthread+0x168/0x1b0 > [ 336.639031] [c03fd61a7e30] [c000b528] > ret_from_kernel_thread+0x5c/0xb4 > [ 336.645971] Instruction dump: > [ 336.648743] 384216a0 7c0802a6 fbe1fff8 f8010010 f821ffd1 7c7f1b78 > 6000 6000 > [ 336.657056] 3940 e87f0838 f95f0838 7c0004ac <7d401828> 314a > 7d40192d 40c2fff4 > [ 336.663997] -[ end trace 4640cf8d4945ad95 ]- > > So flag when the thread is stopped by setting the thread pointer to NULL. > > Signed-off-by: Dave Carroll Reviewed-by: Raghava Aditya Renukunta
Re: [PATCH 03/15] mpt3sas: Add sanity checks for scsi tracker before accessing it.
On Tue, Apr 3, 2018 at 9:26 PM, Bart Van Asschewrote: > On Tue, 2018-04-03 at 10:11 +0530, Sreekanth Reddy wrote: >> [SR] No, driver calls _scsih_flush_running_cmds during Host reset time >> and during host reset time driver will set 'ioc->shost_recovery' flag. >> So in the scsih_qcmd() driver will return the incoming SCSI cmds with >> "SCSI_MLQUEUE_HOST_BUSY" whenever 'ioc->shost_recovery' flag is set as >> shown below, >> >> /* host recovery or link resets sent via IOCTLs */ >> if (ioc->shost_recovery || ioc->ioc_link_reset_in_progress) >> return SCSI_MLQUEUE_HOST_BUSY; > > The ioc->shost_recovery flag is involved in at least the following race: > * From one context a SCSI command is submitted and scsih_qcmd() gets called. > * At the same time sg_reset is invoked from a shell and triggers a call to > scsih_host_reset(). That function in turn will call > mpt3sas_base_hard_reset_handler(). > > I think this scenario can cause ioc->shost_recovery to be set by the mpt3sas > driver after it has been checked by the scsih_qcmd() function. Then these outstanding commands will be get flush by the driver in _scsih_flush_running_cmds() function which we call as a part of host reset operation. > > Anyway, let's get back to patch 03/15 at the start of this e-mail thread. > That patch looks to me like an incomplete attempt to work around a race > condition in the mpt3sas driver. I don't expect that anyone will trust that > patch without further explanation. Which race condition does that patch > address? And why do the mpt3sas maintainers believe that this patch is > sufficient to address that race condition? Sure Bart, we will add proper description with the information which I explained in this mail thread on how this patch will fix below issue, During Host reset operation time driver will flush out all the outstanding IO's to the SML in below function, static void _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc) { struct scsi_cmnd *scmd; struct scsiio_tracker *st; u16 smid; int count = 0; [SR] Here driver is looping starting from smid one to HBA queue depth. for (smid = 1; smid <= ioc->scsiio_depth; smid++) { [SR] Some times it is possible that scsi_cmnd might have created at SML but it might not be issued to the driver as host reset operation is going on, So here we will get non-NULL scmd. scmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (!scmd) continue; count++; _scsih_set_satl_pending(scmd, false); [SR] Here we are trying to get the scsi tracker 'st' for the above scmd (which is not received by the driver) and so fields of this 'st' are uninitialized. st = scsi_cmd_priv(scmd); [SR] And here we are trying to clear the scsi tracker 'st' which is not yet all initialized by the driver, in other terms we are trying to flush out the scmd command which is not yet all received by the driver. mpt3sas_base_clear_st(ioc, st); scsi_dma_unmap(scmd); if (ioc->pci_error_recovery || ioc->remove_host) scmd->result = DID_NO_CONNECT << 16; else scmd->result = DID_RESET << 16; scmd->scsi_done(scmd); } dtmprintk(ioc, pr_info(MPT3SAS_FMT "completing %d cmds\n", ioc->name, count)); } > > Thanks, > > Bart. >
Re: [PATCH 1/1] scsi: mvsas:fix memory leak
Hi Xidong, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on mkp-scsi/for-next] [also build test WARNING on v4.16 next-20180403] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Xidong-Wang/scsi-mvsas-fix-memory-leak/20180404-182132 base: https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next config: x86_64-randconfig-x010-201813 (attached as .config) compiler: gcc-7 (Debian 7.3.0-1) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All warnings (new ones prefixed by >>): drivers/scsi//mvsas/mv_init.c: In function 'mvs_pci_alloc': >> drivers/scsi//mvsas/mv_init.c:373:2: warning: this 'if' clause does not >> guard... [-Wmisleading-indentation] if (!mvi) ^~ drivers/scsi//mvsas/mv_init.c:375:3: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'if' return NULL; ^~ Cyclomatic Complexity 1 arch/x86/include/asm/bitops.h:test_and_set_bit Cyclomatic Complexity 1 arch/x86/include/asm/bitops.h:fls64 Cyclomatic Complexity 1 include/linux/log2.h:__ilog2_u64 Cyclomatic Complexity 1 include/linux/mem_encrypt.h:sme_get_me_mask Cyclomatic Complexity 1 include/asm-generic/getorder.h:__get_order Cyclomatic Complexity 1 arch/x86/include/asm/paravirt.h:arch_local_save_flags Cyclomatic Complexity 1 arch/x86/include/asm/irqflags.h:arch_irqs_disabled_flags Cyclomatic Complexity 1 include/linux/device.h:dev_get_drvdata Cyclomatic Complexity 1 include/linux/device.h:dev_set_drvdata Cyclomatic Complexity 1 include/linux/dma-debug.h:debug_dma_free_coherent Cyclomatic Complexity 1 arch/x86/include/asm/dma-mapping.h:get_arch_dma_ops Cyclomatic Complexity 4 include/linux/dma-mapping.h:get_dma_ops Cyclomatic Complexity 1 include/linux/kasan.h:kasan_kmalloc Cyclomatic Complexity 28 include/linux/slab.h:kmalloc_index Cyclomatic Complexity 1 include/linux/slab.h:kmem_cache_alloc_trace Cyclomatic Complexity 1 include/linux/slab.h:kmalloc_order_trace Cyclomatic Complexity 68 include/linux/slab.h:kmalloc_large Cyclomatic Complexity 5 include/linux/slab.h:kmalloc Cyclomatic Complexity 1 include/linux/slab.h:kzalloc Cyclomatic Complexity 1 include/linux/pci.h:pci_get_drvdata Cyclomatic Complexity 1 include/linux/pci.h:pci_set_drvdata Cyclomatic Complexity 5 drivers/scsi//mvsas/mv_init.c:mvs_post_sas_ha_init Cyclomatic Complexity 8 drivers/scsi//mvsas/mv_init.c:mvs_store_interrupt_coalescing Cyclomatic Complexity 1 drivers/scsi//mvsas/mv_init.c:mvs_show_interrupt_coalescing Cyclomatic Complexity 1 drivers/scsi//mvsas/mv_init.c:mvs_show_driver_version Cyclomatic Complexity 1 arch/x86/include/asm/io.h:ioremap Cyclomatic Complexity 1 drivers/scsi//mvsas/mv_init.c:mvs_exit Cyclomatic Complexity 2 drivers/scsi//mvsas/mv_init.c:mvs_pci_alloc Cyclomatic Complexity 4 include/linux/dma-mapping.h:dma_free_attrs Cyclomatic Complexity 1 include/linux/dma-mapping.h:dma_free_coherent Cyclomatic Complexity 13 drivers/scsi//mvsas/mv_init.c:mvs_free Cyclomatic Complexity 2 drivers/scsi//mvsas/mv_init.c:mvs_pci_remove Cyclomatic Complexity 3 drivers/scsi//mvsas/mv_init.c:mvs_tasklet Cyclomatic Complexity 2 include/linux/interrupt.h:tasklet_schedule Cyclomatic Complexity 3 drivers/scsi//mvsas/mv_init.c:mvs_interrupt Cyclomatic Complexity 4 include/linux/dma-mapping.h:dma_supported Cyclomatic Complexity 3 include/linux/dma-mapping.h:dma_check_mask Cyclomatic Complexity 3 include/linux/dma-mapping.h:dma_set_mask Cyclomatic Complexity 1 include/linux/pci-dma-compat.h:pci_set_dma_mask Cyclomatic Complexity 2 include/linux/dma-mapping.h:dma_set_coherent_mask Cyclomatic Complexity 1 include/linux/pci-dma-compat.h:pci_set_consistent_dma_mask Cyclomatic Complexity 6 drivers/scsi//mvsas/mv_init.c:pci_go_64 Cyclomatic Complexity 5 include/linux/slab.h:kmalloc_array Cyclomatic Complexity 1 include/linux/slab.h:kcalloc Cyclomatic Complexity 3 drivers/scsi//mvsas/mv_init.c:mvs_prep_sas_ha_init Cyclomatic Complexity 2 drivers/scsi//mvsas/mv_init.c:mvs_init_sas_add Cyclomatic Complexity 1 include/scsi/scsi_host.h:scsi_add_host Cyclomatic Complexity 1 include/linux/interrupt.h:request_irq Cyclomatic Complexity 13 drivers/scsi//mvsas/mv_init.c:mvs_pci_init Cyclomatic Complexity 3 drivers/scsi//mvsas/mv_init.c:mvs_init Cyclomatic Complexity 13 drivers/scsi//mvsas/mv_init.c:mvs_ioremap Cyclomatic Complexity 1 drivers/scsi//mvsas/mv_init.c:mvs_iounmap vim +/if +373 drivers/scsi//mvsas/mv_init.c 20b09c29 Andy Yan 2009-05-08 362 6f039790 Greg Kroah-Hartman 2012-12-21 363 static struct mvs_info *mvs_pci_alloc(struct pci_dev *pdev, 20b09c29 Andy Yan 2009-05-08 364 const struct
Re: [PATCH 1/1] scsi: mvsas:fix memory leak
Hi Xidong, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on mkp-scsi/for-next] [also build test WARNING on v4.16 next-20180403] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Xidong-Wang/scsi-mvsas-fix-memory-leak/20180404-182132 base: https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next config: i386-randconfig-x016-201813 (attached as .config) compiler: gcc-7 (Debian 7.3.0-1) 7.3.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 All warnings (new ones prefixed by >>): In file included from include/linux/kernel.h:10:0, from drivers/scsi/mvsas/mv_sas.h:29, from drivers/scsi/mvsas/mv_init.c:27: drivers/scsi/mvsas/mv_init.c: In function 'mvs_pci_alloc': include/linux/compiler.h:58:2: warning: this 'if' clause does not guard... [-Wmisleading-indentation] if (__builtin_constant_p(!!(cond)) ? !!(cond) : \ ^ include/linux/compiler.h:56:23: note: in expansion of macro '__trace_if' #define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) ) ^~ >> drivers/scsi/mvsas/mv_init.c:373:2: note: in expansion of macro 'if' if (!mvi) ^~ drivers/scsi/mvsas/mv_init.c:375:3: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'if' return NULL; ^~ vim +/if +373 drivers/scsi/mvsas/mv_init.c 20b09c29 Andy Yan 2009-05-08 362 6f039790 Greg Kroah-Hartman 2012-12-21 363 static struct mvs_info *mvs_pci_alloc(struct pci_dev *pdev, 20b09c29 Andy Yan 2009-05-08 364 const struct pci_device_id *ent, 20b09c29 Andy Yan 2009-05-08 365 struct Scsi_Host *shost, unsigned int id) 20b09c29 Andy Yan 2009-05-08 366 { 84fbd0ce Xiangliang Yu 2011-05-24 367 struct mvs_info *mvi = NULL; 20b09c29 Andy Yan 2009-05-08 368 struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost); 20b09c29 Andy Yan 2009-05-08 369 b89e8f53 Xiangliang Yu 2011-05-24 370 mvi = kzalloc(sizeof(*mvi) + b89e8f53 Xiangliang Yu 2011-05-24 371 (1L << mvs_chips[ent->driver_data].slot_width) * b89e8f53 Xiangliang Yu 2011-05-24 372 sizeof(struct mvs_slot_info), GFP_KERNEL); 20b09c29 Andy Yan 2009-05-08 @373 if (!mvi) 549079d1 Xidong Wang2018-04-04 374 scsi_host_put(shost); 20b09c29 Andy Yan 2009-05-08 375 return NULL; dd4969a8 Jeff Garzik2009-05-08 376 20b09c29 Andy Yan 2009-05-08 377 mvi->pdev = pdev; 20b09c29 Andy Yan 2009-05-08 378 mvi->dev = >dev; 20b09c29 Andy Yan 2009-05-08 379 mvi->chip_id = ent->driver_data; 20b09c29 Andy Yan 2009-05-08 380 mvi->chip = _chips[mvi->chip_id]; 20b09c29 Andy Yan 2009-05-08 381 INIT_LIST_HEAD(>wq_list); 20b09c29 Andy Yan 2009-05-08 382 20b09c29 Andy Yan 2009-05-08 383 ((struct mvs_prv_info *)sha->lldd_ha)->mvi[id] = mvi; 20b09c29 Andy Yan 2009-05-08 384 ((struct mvs_prv_info *)sha->lldd_ha)->n_phy = mvi->chip->n_phy; 20b09c29 Andy Yan 2009-05-08 385 20b09c29 Andy Yan 2009-05-08 386 mvi->id = id; 20b09c29 Andy Yan 2009-05-08 387 mvi->sas = sha; 20b09c29 Andy Yan 2009-05-08 388 mvi->shost = shost; 20b09c29 Andy Yan 2009-05-08 389 b89e8f53 Xiangliang Yu 2011-05-24 390 mvi->tags = kzalloc(MVS_CHIP_SLOT_SZ>>3, GFP_KERNEL); b89e8f53 Xiangliang Yu 2011-05-24 391 if (!mvi->tags) b89e8f53 Xiangliang Yu 2011-05-24 392 goto err_out; b89e8f53 Xiangliang Yu 2011-05-24 393 20b09c29 Andy Yan 2009-05-08 394 if (MVS_CHIP_DISP->chip_ioremap(mvi)) 20b09c29 Andy Yan 2009-05-08 395 goto err_out; 20b09c29 Andy Yan 2009-05-08 396 if (!mvs_alloc(mvi, shost)) 20b09c29 Andy Yan 2009-05-08 397 return mvi; dd4969a8 Jeff Garzik2009-05-08 398 err_out: dd4969a8 Jeff Garzik2009-05-08 399 mvs_free(mvi); dd4969a8 Jeff Garzik2009-05-08 400 return NULL; dd4969a8 Jeff Garzik2009-05-08 401 } dd4969a8 Jeff Garzik2009-05-08 402 :: The code at line 373 was first introduced by commit :: 20b09c2992fefbe78f8cede7b404fb143a413c52 [SCSI] mvsas: add support for 94xx; layout change; bug fixes :: TO: Andy Yan <a...@marvell.com> :: CC: James Bottomley <james.bottom...@hansenpartnership.com> --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
[PATCH 2/2] sd_zbc: Avoid errors due to sd_zbc_setup() execution
From: Bart Van AsscheSince SCSI scanning occurs asynchronously, since sd_revalidate_disk() is called from sd_probe_async() and since sd_revalidate_disk() calls sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called concurrently with operations referencing a drive zone bitmaps and number of zones. Make sure that this race does not cause failures when revalidate does not detect any change by making the following changes to sd_zbc_setup(): - Ensure that sd_zbc_setup_seq_zones_bitmap() does not change any ZBC metadata in the request queue. - Only modify the ZBC information in the request queue that has changed. If the number of zones has changed, update q->nr_zones, q->seq_zones_wlock and q->seq_zones_bitmap. If the type of some zones has changed but not the number of zones, only update the zone type information. Signed-off-by: Bart Van Assche [Damien] Updated commit message and changed nr_zones/bitmap swap order. Signed-off-by: Damien Le Moal Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: sta...@vger.kernel.org --- drivers/scsi/sd_zbc.c | 45 +++-- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index b59454ed5087..39ddbe92769c 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -551,14 +551,13 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, } /** - * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap. + * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap. * @sdkp: target disk * * Allocate a zone bitmap and initialize it by identifying sequential zones. */ -static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) +static unsigned long *sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) { - struct request_queue *q = sdkp->disk->queue; unsigned long *seq_zones_bitmap; sector_t lba = 0; unsigned char *buf; @@ -566,7 +565,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp); if (!seq_zones_bitmap) - return -ENOMEM; + return ERR_PTR(-ENOMEM); buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); if (!buf) @@ -589,12 +588,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) kfree(buf); if (ret) { kfree(seq_zones_bitmap); - return ret; + return ERR_PTR(ret); } - - q->seq_zones_bitmap = seq_zones_bitmap; - - return 0; + return seq_zones_bitmap; } static void sd_zbc_cleanup(struct scsi_disk *sdkp) @@ -630,24 +626,37 @@ static int sd_zbc_setup(struct scsi_disk *sdkp) * of zones changed. */ if (sdkp->nr_zones != q->nr_zones) { + struct request_queue *q = sdkp->disk->queue; + unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; + size_t zone_bitmap_size; - sd_zbc_cleanup(sdkp); - - q->nr_zones = sdkp->nr_zones; if (sdkp->nr_zones) { - q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp); - if (!q->seq_zones_wlock) { + seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp); + if (!seq_zones_wlock) { ret = -ENOMEM; goto err; } - ret = sd_zbc_setup_seq_zones_bitmap(sdkp); - if (ret) { - sd_zbc_cleanup(sdkp); + seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp); + if (IS_ERR(seq_zones_bitmap)) { + ret = PTR_ERR(seq_zones_bitmap); + kfree(seq_zones_wlock); goto err; } } - + zone_bitmap_size = BITS_TO_LONGS(sdkp->nr_zones) * + sizeof(unsigned long); + if (q->nr_zones != sdkp->nr_zones) { + swap(q->seq_zones_wlock, seq_zones_wlock); + swap(q->seq_zones_bitmap, seq_zones_bitmap); + q->nr_zones = sdkp->nr_zones; + } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap, + zone_bitmap_size) != 0) { + memcpy(q->seq_zones_bitmap, seq_zones_bitmap, + zone_bitmap_size); + } + kfree(seq_zones_wlock); + kfree(seq_zones_bitmap); } return 0; -- 2.14.3
[PATCH 0/2] Fix errors due to revalidation of ZBC disks
The concurrent submission of commands such as a zone reset with the execution of sd_zbc_read_zones() from sd_revalidate() context can cause the command submissions to fail due to possible references to temporarily invalid values such as the number of zones or the disk zone size. This two ptches series introduces fix these problems by avoiding any change to the disk information unless a change is detected by revalidate. Bart Van Assche (1): sd_zbc: Avoid errors due to sd_zbc_setup() execution Damien Le Moal (1): sd_zbc: Avoid errors due to sd_zbc_check_zone_size() execution drivers/scsi/sd_zbc.c | 58 +-- 1 file changed, 33 insertions(+), 25 deletions(-) -- 2.14.3
[PATCH 1/2] sd_zbc: Avoid errors due to sd_zbc_check_zone_size() execution
When sd_revalidate() is executed for a ZBC disk (zoned block device), sd_zbc_read_zones() is called to revalidate the disk zone configuration. This executes sd_zbc_check_zone_size() to check that the disk zone sizes are in line with the defined constraints (all zones must be the same size and a power of 2 number of LBAs). As part of its execution, sd_zbc_check_zone_size() was temporarily setting sdkp->zone_blocks to 0. If during the execution of sd_zbc_check_zone_size() within sd_revalidate() context, another context issues a command which references sdkp->zone_blocks to check zone alignment of the command (e.g. a zone reset is issued and sd_zbc_setup_reset_cmnd() is called), an invalid value for the disk zone size is used and the alignment check fails. Simply fix this by using an on-stack variable inside sd_zbc_check_zone_size() instead of directly using sdkp->zone_blocks. This change is valid for both revalidate as well as for the first scan of the device. Signed-off-by: Damien Le MoalCc: --- drivers/scsi/sd_zbc.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 89cf4498f535..b59454ed5087 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -403,6 +403,7 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf) */ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) { + u64 sdkp_zone_blocks = sdkp->zone_blocks; u64 zone_blocks = 0; sector_t block = 0; unsigned char *buf; @@ -412,8 +413,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) int ret; u8 same; - sdkp->zone_blocks = 0; - /* Get a buffer */ buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); if (!buf) @@ -446,11 +445,11 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) /* Parse zone descriptors */ while (rec < buf + buf_len) { zone_blocks = get_unaligned_be64([8]); - if (sdkp->zone_blocks == 0) { - sdkp->zone_blocks = zone_blocks; - } else if (zone_blocks != sdkp->zone_blocks && + if (sdkp_zone_blocks == 0) { + sdkp_zone_blocks = zone_blocks; + } else if (zone_blocks != sdkp_zone_blocks && (block + zone_blocks < sdkp->capacity - || zone_blocks > sdkp->zone_blocks)) { + || zone_blocks > sdkp_zone_blocks)) { zone_blocks = 0; goto out; } @@ -467,7 +466,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) } while (block < sdkp->capacity); - zone_blocks = sdkp->zone_blocks; + zone_blocks = sdkp_zone_blocks; out: if (!zone_blocks) { -- 2.14.3
Re: [PATCH v5] scsi: new zorro_esp.c for Amiga Zorro NCR53C9x boards
Hi Christoph, thank you for taking the time to review and comment - my responses inline below. Am 03.04.2018 um 19:45 schrieb Christoph Hellwig: > Just a few style comments: > >> +static int zorro_esp_irq_pending(struct esp *); >> +static int cyber_esp_irq_pending(struct esp *); >> +static int fastlane_esp_irq_pending(struct esp *); > > Please avoid forward declarations wherever possible. > >> +struct zorro_driver_data { >> +const char *name; >> +unsigned long offset; >> +unsigned long dma_offset; >> +int absolute; /* offset is absolute address */ >> +int scsi_option; >> +int (*irq_pending)(struct esp *esp); >> +void (*dma_invalidate)(struct esp *esp); >> +void (*send_dma_cmd)(struct esp *esp, u32 dma_addr, u32 esp_count, >> + u32 dma_count, int write, u8 cmd); >> +}; > > Please use different esp_driver_ops instances for the different board > types. > >> +static const struct zorro_driver_data cyberstormI_data = { >> +.name = "CyberStormI", >> +.offset = 0xf400, >> +.dma_offset = 0xf800, >> +.absolute = 0, >> +.scsi_option = 0, > > You can remove all the xero initializations on static data. > Also please align the = signs with tabs in struct declarations. Will do all that. >> +static dma_addr_t zorro_esp_map_single(struct esp *esp, void *buf, >> + size_t sz, int dir) >> +{ >> +return dma_map_single(esp->dev, buf, sz, dir); >> +} >> + >> +static int zorro_esp_map_sg(struct esp *esp, struct scatterlist *sg, >> + int num_sg, int dir) >> +{ >> +return dma_map_sg(esp->dev, sg, num_sg, dir); >> +} >> + >> +static void zorro_esp_unmap_single(struct esp *esp, dma_addr_t addr, >> + size_t sz, int dir) >> +{ >> +dma_unmap_single(esp->dev, addr, sz, dir); >> +} >> + >> +static void zorro_esp_unmap_sg(struct esp *esp, struct scatterlist *sg, >> + int num_sg, int dir) >> +{ >> +dma_unmap_sg(esp->dev, sg, num_sg, dir); >> +} > > These wrappers seem rather pointless. They are nonetheless necessary, for two reasons as far as I can see: ESP driver DMA ops hooks as defined in esp_scsi.h use struct esp * as first parameter, not struct dev *. Changing that would require changing all ESP based drivers (sun_esp.c, jazz_esp.c, am53c974.c, sun3_esp.c, mac_esp.c) and the driver core. I'd like to hear Dave's view on that. The generic dma_map_* and dma_unmap_* ops are defined as inline functions in dma-mapping.h. There may be a good reason for that choice. You probably know better than me what the reason for inlining was. All other ESP drivers use the same wrapper mechanism, and most of them don't do more than dereference the struct device pointer and pass that and the rest of parameters on to the generic or PCI API hooks. I see no reason to deviate from that convention. >> +/* PIO macros as used in mac_esp.c. >> + * Note that addr and fifo arguments are local-scope variables declared >> + * in zorro_esp_send_pio_cmd(), the macros are only used in that function, >> + * and addr and fifo are referenced in each use of the macros so there >> + * is no need to pass them as macro parameters. >> + */ > > Please use normal Linux comment style: > > /* > * Blah, blah, blah. > */ Thanks, done. >> +#define ZORRO_ESP_PIO_LOOP(operands, reg1) \ >> +{ \ >> +asm volatile ( \ >> + "1: moveb " operands "\n" \ >> + " subqw #1,%1 \n" \ >> + " jbne 1b \n" \ >> + : "+a" (addr), "+r" (reg1) \ >> + : "a" (fifo)); \ >> +} > > What is the point of the curly braces around the asm statement? My overeager attempt at shutting up checkpatch ('need braces around complex macro'). I'll revert to what Finn used in mac_esp.c (which is where this code was taken from). That did pass review at the time, hope this counts for something. >> +static void zorro_esp_send_pio_cmd(struct esp *esp, u32 addr, u32 esp_count, >> + u32 dma_count, int write, u8 cmd) >> +{ >> +struct zorro_esp_priv *zep = dev_get_drvdata(esp->dev); >> +u8 __iomem *fifo = esp->regs + ESP_FDATA * 16; >> +u8 phase = esp->sreg & ESP_STAT_PMASK; >> + >> +cmd &= ~ESP_CMD_DMA; >> + >> +if (write) { > > It seems like this routine would benefit from being split into a read > and a write one. The ESP driver uses just a single send_dma_cmd() function, so we'd have to conditionalize which of the two PIO functions to call from within each board specific send_dma_cmd(). IMO that makes the code harder to follow. (I wouldn't go as far as Finn and add another layer of indirection, but the impact is much the same.) Finn also provided another justification for using a single routine for read and write - we plan to share that code between mac_esp.c and zorro_esp.c so I'd rather keep it as close to the mac_esp.c version as possible. >> +// Blizzard 1230/60
Re: [PATCH] scsi: Fix failed request error code
On 4/4/18 16:39, Damien Le Moal wrote: > Hannes, > > On 4/4/18 16:35, Hannes Reinecke wrote: >> On Wed, 4 Apr 2018 07:06:58 + >> Damien Le Moalwrote: >> >>> Hannes, >>> >>> On 4/4/18 15:57, Hannes Reinecke wrote: On Wed, 4 Apr 2018 15:51:38 +0900 Damien Le Moal wrote: > With the introduction of commit e39a97353e53 ("scsi: core: return > BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command > that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but > lacking additional sense information will have a return code set to > BLK_STS_OK. This results in the request issuer to see successful > request execution despite the failure. An example of such case is > an unaligned write on a host managed ZAC disk connected to a SAS > HBA with a malfunctioning SAT. The unaligned write command gets > aborted but has no additional sense information. > > sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK > driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key : > Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: > No additional sense information sd 10:0:0:0: [sde] tag#3905 CDB: > Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00 > print_req_error: I/O error, dev sde, sector 274726920 > > In scsi_io_completion(), sense key handling to not change the > request error code and success being reported to the issuer. > > Fix this by making sure that the error code always indicates an > error if scsi_io_completion() decide that the action to be taken > for a failed command is to not retry it and terminate it > immediately (ACTION_FAIL) . > > Signed-off-by: Damien Le Moal > Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in > __scsi_error_from_host_byte()") Cc: Hannes Reinecke > Cc: > --- > drivers/scsi/scsi_lib.c | 9 + > 1 file changed, 9 insertions(+) > > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > index c84f931388f2..87579bfcc186 100644 > --- a/drivers/scsi/scsi_lib.c > +++ b/drivers/scsi/scsi_lib.c > @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd > *cmd, unsigned int good_bytes) scsi_print_command(cmd); > } > } > + /* > + * The command failed and should not be retried. > If the host > + * byte is DID_OK, then > __scsi_error_from_host_byte() returned > + * BLK_STS_OK and error indicates a success. Make > sure to not > + * use that as the completion code and always > return an > + * I/O error. > + */ > + if (error == BLK_STS_OK) > + error = BLK_STS_IOERR; > if (!scsi_end_request(req, error, > blk_rq_err_bytes(req), 0)) return; > /*FALLTHRU*/ That looks wrong. Shouldn't __scsi_error_from_host_byte() return the correct status here? >>> >>> My drive said: >>> >>> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK >>> driverbyte=DRIVER_SENSE >>> sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current] >>> sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense >>> information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 >>> 00 02 0c 00 01 00 00 00 01 00 00 >>> >>> Since hostbyte is DID_OK, __scsi_error_from_host_byte() returns >>> BLK_STS_OK. The HBA fails to give sense data, so the ABORTED_COMMAND >>> case in scsi_io_completion() "switch (sshdr.sense_key)" does nothing >>> and error stays equal to success. scsi_end_request() gets called with >>> that and dd sees a success... >>> >>> There are also plenty of other sense keys cases where error is not >>> changed despite the fact that error can be BLK_STS_SUCCESS (in fact, I >>> think this is likely the most common case since an command failure >>> with hostbyte=DID_OK and driverbyte=DRIVER_SENSE is probably the most >>> common one. >>> >>> My patch is a bit of a hammer and makes sure that an ACTION_FAIL >>> request is completed as a failure... Am I getting all this wrong ? >>> >> >> Just checked with the code, and send a new patch. >> Which I find a bit clearer. >> >> Fact is that __scsi_error_from_host_byte() is not sufficient to >> evaluate the return code, so taking its return value with any further >> checks is indeed wrong. >> But then it never said so on the boilerplate of >> __scsi_error_from_host_byte() ... >> >> So please do check the 'v2' version of the patch. >> > > It fixes the particular problem I am seeing. > But looking more at this code, isn't there a problem with > sshdr.sense_key == RECOVERED_ERROR ? > > if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) { > ... > result = 0; >
Re: [PATCH] scsi: Fix failed request error code
Hannes, On 4/4/18 16:35, Hannes Reinecke wrote: > On Wed, 4 Apr 2018 07:06:58 + > Damien Le Moalwrote: > >> Hannes, >> >> On 4/4/18 15:57, Hannes Reinecke wrote: >>> On Wed, 4 Apr 2018 15:51:38 +0900 >>> Damien Le Moal wrote: >>> With the introduction of commit e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but lacking additional sense information will have a return code set to BLK_STS_OK. This results in the request issuer to see successful request execution despite the failure. An example of such case is an unaligned write on a host managed ZAC disk connected to a SAS HBA with a malfunctioning SAT. The unaligned write command gets aborted but has no additional sense information. sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00 print_req_error: I/O error, dev sde, sector 274726920 In scsi_io_completion(), sense key handling to not change the request error code and success being reported to the issuer. Fix this by making sure that the error code always indicates an error if scsi_io_completion() decide that the action to be taken for a failed command is to not retry it and terminate it immediately (ACTION_FAIL) . Signed-off-by: Damien Le Moal Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()") Cc: Hannes Reinecke Cc: --- drivers/scsi/scsi_lib.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index c84f931388f2..87579bfcc186 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_print_command(cmd); } } + /* + * The command failed and should not be retried. If the host + * byte is DID_OK, then __scsi_error_from_host_byte() returned + * BLK_STS_OK and error indicates a success. Make sure to not + * use that as the completion code and always return an + * I/O error. + */ + if (error == BLK_STS_OK) + error = BLK_STS_IOERR; if (!scsi_end_request(req, error, blk_rq_err_bytes(req), 0)) return; /*FALLTHRU*/ >>> >>> That looks wrong. >>> Shouldn't __scsi_error_from_host_byte() return the correct status >>> here? >> >> My drive said: >> >> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK >> driverbyte=DRIVER_SENSE >> sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current] >> sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense >> information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 >> 00 02 0c 00 01 00 00 00 01 00 00 >> >> Since hostbyte is DID_OK, __scsi_error_from_host_byte() returns >> BLK_STS_OK. The HBA fails to give sense data, so the ABORTED_COMMAND >> case in scsi_io_completion() "switch (sshdr.sense_key)" does nothing >> and error stays equal to success. scsi_end_request() gets called with >> that and dd sees a success... >> >> There are also plenty of other sense keys cases where error is not >> changed despite the fact that error can be BLK_STS_SUCCESS (in fact, I >> think this is likely the most common case since an command failure >> with hostbyte=DID_OK and driverbyte=DRIVER_SENSE is probably the most >> common one. >> >> My patch is a bit of a hammer and makes sure that an ACTION_FAIL >> request is completed as a failure... Am I getting all this wrong ? >> > > Just checked with the code, and send a new patch. > Which I find a bit clearer. > > Fact is that __scsi_error_from_host_byte() is not sufficient to > evaluate the return code, so taking its return value with any further > checks is indeed wrong. > But then it never said so on the boilerplate of > __scsi_error_from_host_byte() ... > > So please do check the 'v2' version of the patch. > It fixes the particular problem I am seeing. But looking more at this code, isn't there a problem with sshdr.sense_key == RECOVERED_ERROR ? if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) { ... result = 0; /* for passthrough error may be set */ error = BLK_STS_OK; } Then the error will be changed wrongly to BLK_STS_IOERR. My original
Re: [PATCH] scsi: Fix failed request error code
On Wed, 4 Apr 2018 07:06:58 + Damien Le Moalwrote: > Hannes, > > On 4/4/18 15:57, Hannes Reinecke wrote: > > On Wed, 4 Apr 2018 15:51:38 +0900 > > Damien Le Moal wrote: > > > >> With the introduction of commit e39a97353e53 ("scsi: core: return > >> BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command > >> that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but > >> lacking additional sense information will have a return code set to > >> BLK_STS_OK. This results in the request issuer to see successful > >> request execution despite the failure. An example of such case is > >> an unaligned write on a host managed ZAC disk connected to a SAS > >> HBA with a malfunctioning SAT. The unaligned write command gets > >> aborted but has no additional sense information. > >> > >> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK > >> driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key : > >> Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: > >> No additional sense information sd 10:0:0:0: [sde] tag#3905 CDB: > >> Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00 > >> print_req_error: I/O error, dev sde, sector 274726920 > >> > >> In scsi_io_completion(), sense key handling to not change the > >> request error code and success being reported to the issuer. > >> > >> Fix this by making sure that the error code always indicates an > >> error if scsi_io_completion() decide that the action to be taken > >> for a failed command is to not retry it and terminate it > >> immediately (ACTION_FAIL) . > >> > >> Signed-off-by: Damien Le Moal > >> Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in > >> __scsi_error_from_host_byte()") Cc: Hannes Reinecke > >> Cc: > >> --- > >> drivers/scsi/scsi_lib.c | 9 + > >> 1 file changed, 9 insertions(+) > >> > >> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > >> index c84f931388f2..87579bfcc186 100644 > >> --- a/drivers/scsi/scsi_lib.c > >> +++ b/drivers/scsi/scsi_lib.c > >> @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd > >> *cmd, unsigned int good_bytes) scsi_print_command(cmd); > >>} > >>} > >> + /* > >> + * The command failed and should not be retried. > >> If the host > >> + * byte is DID_OK, then > >> __scsi_error_from_host_byte() returned > >> + * BLK_STS_OK and error indicates a success. Make > >> sure to not > >> + * use that as the completion code and always > >> return an > >> + * I/O error. > >> + */ > >> + if (error == BLK_STS_OK) > >> + error = BLK_STS_IOERR; > >>if (!scsi_end_request(req, error, > >> blk_rq_err_bytes(req), 0)) return; > >>/*FALLTHRU*/ > > > > That looks wrong. > > Shouldn't __scsi_error_from_host_byte() return the correct status > > here? > > My drive said: > > sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK > driverbyte=DRIVER_SENSE > sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current] > sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense > information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 > 00 02 0c 00 01 00 00 00 01 00 00 > > Since hostbyte is DID_OK, __scsi_error_from_host_byte() returns > BLK_STS_OK. The HBA fails to give sense data, so the ABORTED_COMMAND > case in scsi_io_completion() "switch (sshdr.sense_key)" does nothing > and error stays equal to success. scsi_end_request() gets called with > that and dd sees a success... > > There are also plenty of other sense keys cases where error is not > changed despite the fact that error can be BLK_STS_SUCCESS (in fact, I > think this is likely the most common case since an command failure > with hostbyte=DID_OK and driverbyte=DRIVER_SENSE is probably the most > common one. > > My patch is a bit of a hammer and makes sure that an ACTION_FAIL > request is completed as a failure... Am I getting all this wrong ? > Just checked with the code, and send a new patch. Which I find a bit clearer. Fact is that __scsi_error_from_host_byte() is not sufficient to evaluate the return code, so taking its return value with any further checks is indeed wrong. But then it never said so on the boilerplate of __scsi_error_from_host_byte() ... So please do check the 'v2' version of the patch. Cheers, Hannes
[PATCHv2] scsi: Fix failed request error code
With the introduction of commit e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but lacking additional sense information will have a return code set to BLK_STS_OK. This results in the request issuer to see successful request execution despite the failure. An example of such case is an unaligned write on a host managed ZAC disk connected to a SAS HBA with a malfunctioning SAT. The unaligned write command gets aborted but has no additional sense information. sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00 print_req_error: I/O error, dev sde, sector 274726920 In scsi_io_completion(), sense key handling to not change the request error code and success being reported to the issuer. Fix this by making sure that the error code always indicates an error if scsi_io_completion() decide that the action to be taken for a failed command is to not retry it and terminate it immediately (ACTION_FAIL) . Signed-off-by: Damien Le MoalSigned-off-by: Hannes Reinecke Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()") Cc: --- drivers/scsi/scsi_lib.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 393f9db8f41b..9389c41e2829 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -905,6 +905,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) goto requeue; error = __scsi_error_from_host_byte(cmd, result); + /* +* If the hostbyte was DID_OK, but the sense code is valid +* we always should set BLK_STS_IOERR. +*/ + if (error == BLK_STS_OK && sense_valid) + error = BLK_STS_IOERR; if (host_byte(result) == DID_RESET) { /* Third party bus reset or reset for error recovery -- 2.12.3
Re: [PATCH] scsi: Fix failed request error code
Hannes, On 4/4/18 15:57, Hannes Reinecke wrote: > On Wed, 4 Apr 2018 15:51:38 +0900 > Damien Le Moalwrote: > >> With the introduction of commit e39a97353e53 ("scsi: core: return >> BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command >> that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but >> lacking additional sense information will have a return code set to >> BLK_STS_OK. This results in the request issuer to see successful >> request execution despite the failure. An example of such case is an >> unaligned write on a host managed ZAC disk connected to a SAS HBA >> with a malfunctioning SAT. The unaligned write command gets aborted >> but has no additional sense information. >> >> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK >> driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key : >> Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: No >> additional sense information sd 10:0:0:0: [sde] tag#3905 CDB: >> Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00 >> print_req_error: I/O error, dev sde, sector 274726920 >> >> In scsi_io_completion(), sense key handling to not change the request >> error code and success being reported to the issuer. >> >> Fix this by making sure that the error code always indicates an error >> if scsi_io_completion() decide that the action to be taken for a >> failed command is to not retry it and terminate it immediately >> (ACTION_FAIL) . >> >> Signed-off-by: Damien Le Moal >> Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in >> __scsi_error_from_host_byte()") Cc: Hannes Reinecke >> Cc: >> --- >> drivers/scsi/scsi_lib.c | 9 + >> 1 file changed, 9 insertions(+) >> >> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c >> index c84f931388f2..87579bfcc186 100644 >> --- a/drivers/scsi/scsi_lib.c >> +++ b/drivers/scsi/scsi_lib.c >> @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd *cmd, >> unsigned int good_bytes) scsi_print_command(cmd); >> } >> } >> +/* >> + * The command failed and should not be retried. If >> the host >> + * byte is DID_OK, then >> __scsi_error_from_host_byte() returned >> + * BLK_STS_OK and error indicates a success. Make >> sure to not >> + * use that as the completion code and always return >> an >> + * I/O error. >> + */ >> +if (error == BLK_STS_OK) >> +error = BLK_STS_IOERR; >> if (!scsi_end_request(req, error, >> blk_rq_err_bytes(req), 0)) return; >> /*FALLTHRU*/ > > That looks wrong. > Shouldn't __scsi_error_from_host_byte() return the correct status here? My drive said: sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00 Since hostbyte is DID_OK, __scsi_error_from_host_byte() returns BLK_STS_OK. The HBA fails to give sense data, so the ABORTED_COMMAND case in scsi_io_completion() "switch (sshdr.sense_key)" does nothing and error stays equal to success. scsi_end_request() gets called with that and dd sees a success... There are also plenty of other sense keys cases where error is not changed despite the fact that error can be BLK_STS_SUCCESS (in fact, I think this is likely the most common case since an command failure with hostbyte=DID_OK and driverbyte=DRIVER_SENSE is probably the most common one. My patch is a bit of a hammer and makes sure that an ACTION_FAIL request is completed as a failure... Am I getting all this wrong ? Best. -- Damien Le Moal, Western Digital
Re: [PATCH] scsi: Fix failed request error code
On Wed, 4 Apr 2018 15:51:38 +0900 Damien Le Moalwrote: > With the introduction of commit e39a97353e53 ("scsi: core: return > BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command > that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but > lacking additional sense information will have a return code set to > BLK_STS_OK. This results in the request issuer to see successful > request execution despite the failure. An example of such case is an > unaligned write on a host managed ZAC disk connected to a SAS HBA > with a malfunctioning SAT. The unaligned write command gets aborted > but has no additional sense information. > > sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK > driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key : > Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: No > additional sense information sd 10:0:0:0: [sde] tag#3905 CDB: > Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00 > print_req_error: I/O error, dev sde, sector 274726920 > > In scsi_io_completion(), sense key handling to not change the request > error code and success being reported to the issuer. > > Fix this by making sure that the error code always indicates an error > if scsi_io_completion() decide that the action to be taken for a > failed command is to not retry it and terminate it immediately > (ACTION_FAIL) . > > Signed-off-by: Damien Le Moal > Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in > __scsi_error_from_host_byte()") Cc: Hannes Reinecke > Cc: > --- > drivers/scsi/scsi_lib.c | 9 + > 1 file changed, 9 insertions(+) > > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c > index c84f931388f2..87579bfcc186 100644 > --- a/drivers/scsi/scsi_lib.c > +++ b/drivers/scsi/scsi_lib.c > @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd *cmd, > unsigned int good_bytes) scsi_print_command(cmd); > } > } > + /* > + * The command failed and should not be retried. If > the host > + * byte is DID_OK, then > __scsi_error_from_host_byte() returned > + * BLK_STS_OK and error indicates a success. Make > sure to not > + * use that as the completion code and always return > an > + * I/O error. > + */ > + if (error == BLK_STS_OK) > + error = BLK_STS_IOERR; > if (!scsi_end_request(req, error, > blk_rq_err_bytes(req), 0)) return; > /*FALLTHRU*/ That looks wrong. Shouldn't __scsi_error_from_host_byte() return the correct status here? Cheers, Hannes
[PATCH] scsi: Fix failed request error code
With the introduction of commit e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but lacking additional sense information will have a return code set to BLK_STS_OK. This results in the request issuer to see successful request execution despite the failure. An example of such case is an unaligned write on a host managed ZAC disk connected to a SAS HBA with a malfunctioning SAT. The unaligned write command gets aborted but has no additional sense information. sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00 print_req_error: I/O error, dev sde, sector 274726920 In scsi_io_completion(), sense key handling to not change the request error code and success being reported to the issuer. Fix this by making sure that the error code always indicates an error if scsi_io_completion() decide that the action to be taken for a failed command is to not retry it and terminate it immediately (ACTION_FAIL) . Signed-off-by: Damien Le MoalFixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()") Cc: Hannes Reinecke Cc: --- drivers/scsi/scsi_lib.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index c84f931388f2..87579bfcc186 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_print_command(cmd); } } + /* +* The command failed and should not be retried. If the host +* byte is DID_OK, then __scsi_error_from_host_byte() returned +* BLK_STS_OK and error indicates a success. Make sure to not +* use that as the completion code and always return an +* I/O error. +*/ + if (error == BLK_STS_OK) + error = BLK_STS_IOERR; if (!scsi_end_request(req, error, blk_rq_err_bytes(req), 0)) return; /*FALLTHRU*/ -- 2.14.3