Re: [PATCH] tcmu: allow userspace to reset netlink

2018-04-04 Thread Mike Christie
On 04/04/2018 09:38 PM, Xiubo Li wrote:
> On 2018/4/5 8:47, Mike Christie wrote:
>> On 04/02/2018 06:42 AM, xiu...@redhat.com wrote:
>>> From: Xiubo Li 
>>>
>>> This patch adds 1 tcmu attr to reset and complete all the blocked
>>> netlink waiting threads. It's used when the userspace daemon like
>>> tcmu-runner has crashed or forced to shutdown just before the
>>> netlink requests be replied to the kernel, then the netlink requeting
>>> threads will get stuck forever. We must reboot the machine to recover
>>> from it and by this the rebootng is not a must then.
>>>
>>> The netlink reset operation should be done before the userspace daemon
>>> could receive and handle the netlink requests to be safe.
>>>
>>> Signed-off-by: Xiubo Li 
>>> ---
>>>   drivers/target/target_core_user.c | 99
>>> ---
>>>   1 file changed, 93 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/target/target_core_user.c
>>> b/drivers/target/target_core_user.c
>>> index 4ad89ea..dc8879d 100644
>>> --- a/drivers/target/target_core_user.c
>>> +++ b/drivers/target/target_core_user.c
>>> @@ -103,9 +103,13 @@ struct tcmu_hba {
>>> #define TCMU_CONFIG_LEN 256
>>>   +static spinlock_t nl_complete_lock;
>>> +static struct idr complete_wait_udevs = IDR_INIT;
>>> +
>>>   struct tcmu_nl_cmd {
>>>   /* wake up thread waiting for reply */
>>> -struct completion complete;
>>> +bool complete;
>>> +
>>>   int cmd;
>>>   int status;
>>>   };
>>> @@ -159,12 +163,17 @@ struct tcmu_dev {
>>> spinlock_t nl_cmd_lock;
>>>   struct tcmu_nl_cmd curr_nl_cmd;
>>> -/* wake up threads waiting on curr_nl_cmd */
>>> +/* wake up threads waiting on nl_cmd_wq */
>>>   wait_queue_head_t nl_cmd_wq;
>>>   +/* complete thread waiting complete_wq */
>>> +wait_queue_head_t complete_wq;
>>> +
>>>   char dev_config[TCMU_CONFIG_LEN];
>>> int nl_reply_supported;
>>> +
>>> +uint32_t dev_id;
>>>   };
>>> #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev,
>>> se_dev)
>>> @@ -251,6 +260,56 @@ static int tcmu_get_global_max_data_area(char
>>> *buffer,
>>>"Max MBs allowed to be allocated to all the tcmu device's "
>>>"data areas.");
>>>   +static void tcmu_complete_wake_up(struct tcmu_dev *udev)
>>> +{
>>> +struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd;
>>> +
>>> +spin_lock(_complete_lock);
>>> +nl_cmd->complete = true;
>>> +wake_up(>complete_wq);
>>> +spin_unlock(_complete_lock);
>>> +}
>>> +
>>> +static void tcmu_complete_wake_up_all(void)
>>> +{
>>> +struct tcmu_nl_cmd *nl_cmd;
>>> +struct tcmu_dev *udev;
>>> +int i;
>>> +
>>> +spin_lock(_complete_lock);
>>> +idr_for_each_entry(_wait_udevs, udev, i) {
>>> +nl_cmd = >curr_nl_cmd;
>>> +nl_cmd->complete = true;
>>> +wake_up(>complete_wq);
>>> +}
>>> +spin_unlock(_complete_lock);
>>> +}
>>> +
>>> +static int tcmu_complete_wait(struct tcmu_dev *udev)
>>> +{
>>> +struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd;
>>> +uint32_t dev_id;
>>> +
>>> +spin_lock(_complete_lock);
>>> +dev_id = idr_alloc(_wait_udevs, udev, 1, USHRT_MAX,
>>> GFP_NOWAIT);
>>> +if (dev_id < 0) {
>>> +pr_err("tcmu: Could not allocate dev id.\n");
>>> +return dev_id;
>>> +}
>>> +udev->dev_id = dev_id;
>> dev_id is never used.
> It will be used when the device is being removed.

Ah yeah, you are right. Bad comment on my part. I was thinking/writing
wrt if we used a list or helper around devices_idr.

> 
>> I think if you just wanted to loop over all the devices you could just
>> use a list.
>>
>> Or,
>>
>> Just add a helper around target_core_device.c:devices_idr that just
>> gives you the tcmu devices.
>>
>>
>>
>>> +spin_unlock(_complete_lock);
>>> +
>>> +pr_debug("sleeping for nl reply\n");
>>> +wait_event(udev->complete_wq, nl_cmd->complete);
>> I don't think you will need the complete field then or this function.
>>
>>
>>> +
>>> +spin_lock(_complete_lock);
>>> +nl_cmd->complete = false;
>>> +idr_remove(_wait_udevs, dev_id);
>>> +spin_unlock(_complete_lock);
>>> +
>>> +return 0;
>>> +}
>>> +
>>>   /* multicast group */
>>>   enum tcmu_multicast_groups {
>>>   TCMU_MCGRP_CONFIG,
>>> @@ -311,7 +370,7 @@ static int tcmu_genl_cmd_done(struct genl_info
>>> *info, int completed_cmd)
>>>   if (!is_removed)
>>>target_undepend_item(>dev_group.cg_item);
>>>   if (!ret)
>>> -complete(_cmd->complete);
>>> +tcmu_complete_wake_up(udev);
>>>   return ret;
>>>   }
>>>   @@ -1258,6 +1317,7 @@ static struct se_device
>>> *tcmu_alloc_device(struct se_hba *hba, const char *name)
>>>   timer_setup(>cmd_timer, tcmu_cmd_timedout, 0);
>>> init_waitqueue_head(>nl_cmd_wq);
>>> +init_waitqueue_head(>complete_wq);
>>>   spin_lock_init(>nl_cmd_lock);
>>> INIT_RADIX_TREE(>data_blocks, GFP_KERNEL);

Re: [PATCH] tcmu: allow userspace to reset netlink

2018-04-04 Thread Xiubo Li

On 2018/4/5 8:47, Mike Christie wrote:

On 04/02/2018 06:42 AM, xiu...@redhat.com wrote:

From: Xiubo Li 

This patch adds 1 tcmu attr to reset and complete all the blocked
netlink waiting threads. It's used when the userspace daemon like
tcmu-runner has crashed or forced to shutdown just before the
netlink requests be replied to the kernel, then the netlink requeting
threads will get stuck forever. We must reboot the machine to recover
from it and by this the rebootng is not a must then.

The netlink reset operation should be done before the userspace daemon
could receive and handle the netlink requests to be safe.

Signed-off-by: Xiubo Li 
---
  drivers/target/target_core_user.c | 99 ---
  1 file changed, 93 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_user.c 
b/drivers/target/target_core_user.c
index 4ad89ea..dc8879d 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -103,9 +103,13 @@ struct tcmu_hba {
  
  #define TCMU_CONFIG_LEN 256
  
+static spinlock_t nl_complete_lock;

+static struct idr complete_wait_udevs = IDR_INIT;
+
  struct tcmu_nl_cmd {
/* wake up thread waiting for reply */
-   struct completion complete;
+   bool complete;
+
int cmd;
int status;
  };
@@ -159,12 +163,17 @@ struct tcmu_dev {
  
  	spinlock_t nl_cmd_lock;

struct tcmu_nl_cmd curr_nl_cmd;
-   /* wake up threads waiting on curr_nl_cmd */
+   /* wake up threads waiting on nl_cmd_wq */
wait_queue_head_t nl_cmd_wq;
  
+	/* complete thread waiting complete_wq */

+   wait_queue_head_t complete_wq;
+
char dev_config[TCMU_CONFIG_LEN];
  
  	int nl_reply_supported;

+
+   uint32_t dev_id;
  };
  
  #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev)

@@ -251,6 +260,56 @@ static int tcmu_get_global_max_data_area(char *buffer,
 "Max MBs allowed to be allocated to all the tcmu device's "
 "data areas.");
  
+static void tcmu_complete_wake_up(struct tcmu_dev *udev)

+{
+   struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd;
+
+   spin_lock(_complete_lock);
+   nl_cmd->complete = true;
+   wake_up(>complete_wq);
+   spin_unlock(_complete_lock);
+}
+
+static void tcmu_complete_wake_up_all(void)
+{
+   struct tcmu_nl_cmd *nl_cmd;
+   struct tcmu_dev *udev;
+   int i;
+
+   spin_lock(_complete_lock);
+   idr_for_each_entry(_wait_udevs, udev, i) {
+   nl_cmd = >curr_nl_cmd;
+   nl_cmd->complete = true;
+   wake_up(>complete_wq);
+   }
+   spin_unlock(_complete_lock);
+}
+
+static int tcmu_complete_wait(struct tcmu_dev *udev)
+{
+   struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd;
+   uint32_t dev_id;
+
+   spin_lock(_complete_lock);
+   dev_id = idr_alloc(_wait_udevs, udev, 1, USHRT_MAX, 
GFP_NOWAIT);
+   if (dev_id < 0) {
+   pr_err("tcmu: Could not allocate dev id.\n");
+   return dev_id;
+   }
+   udev->dev_id = dev_id;

dev_id is never used.

It will be used when the device is being removed.


I think if you just wanted to loop over all the devices you could just
use a list.

Or,

Just add a helper around target_core_device.c:devices_idr that just
gives you the tcmu devices.




+   spin_unlock(_complete_lock);
+
+   pr_debug("sleeping for nl reply\n");
+   wait_event(udev->complete_wq, nl_cmd->complete);

I don't think you will need the complete field then or this function.



+
+   spin_lock(_complete_lock);
+   nl_cmd->complete = false;
+   idr_remove(_wait_udevs, dev_id);
+   spin_unlock(_complete_lock);
+
+   return 0;
+}
+
  /* multicast group */
  enum tcmu_multicast_groups {
TCMU_MCGRP_CONFIG,
@@ -311,7 +370,7 @@ static int tcmu_genl_cmd_done(struct genl_info *info, int 
completed_cmd)
if (!is_removed)
 target_undepend_item(>dev_group.cg_item);
if (!ret)
-   complete(_cmd->complete);
+   tcmu_complete_wake_up(udev);
return ret;
  }
  
@@ -1258,6 +1317,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)

timer_setup(>cmd_timer, tcmu_cmd_timedout, 0);
  
  	init_waitqueue_head(>nl_cmd_wq);

+   init_waitqueue_head(>complete_wq);
spin_lock_init(>nl_cmd_lock);
  
  	INIT_RADIX_TREE(>data_blocks, GFP_KERNEL);

@@ -1462,7 +1522,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p)
  
  	kfree(udev->uio_info.name);

kfree(udev->name);
+
+   spin_lock(_complete_lock);
+   idr_remove(_wait_udevs, udev->dev_id);
kfree(udev);
+   spin_unlock(_complete_lock);
  }
  
  static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)

@@ -1555,7 +1619,6 @@ static void tcmu_init_genl_cmd_reply(struct tcmu_dev 
*udev, int cmd)
  
  	memset(nl_cmd, 0, sizeof(*nl_cmd));


Re: [PATCH v2] Fix DID_OK handling in __scsi_error_from_host_byte()

2018-04-04 Thread Damien Le Moal
On Wed, 2018-04-04 at 10:53 -0700, Bart Van Assche wrote:
> Commit e39a97353e53 modified __scsi_error_from_host_byte() such
> that that function translates DID_OK into BLK_STS_OK. However,
> the description of that commit is wrong: it mentions that commit
> 2a842acab109 introduced a bug in __scsi_error_from_host_byte()
> although that commit did not change the behavior of that function.
> Additionally, commit e39a97353e53 introduced a severe bug: it causes
> commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE
> to be completed with BLK_STS_OK. Fix __scsi_error_from_host_byte()
> by only translating good status values into BLK_STS_OK.
> 
> Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in
> __scsi_error_from_host_byte()")
> Reported-by: Damien Le Moal 
> Signed-off-by: Bart Van Assche 
> Cc: Hannes Reinecke 
> Cc: Douglas Gilbert 
> Cc: Damien Le Moal 
> Cc: Christoph Hellwig 
> Cc: sta...@vger.kernel.org
> ---
> 
> Changes compared to v1:
> - Modified __scsi_error_from_host_byte() such that it again returns
>   BLK_STS_OK for CONDITION MET and other result codes that represent
>   success.
> 
>  drivers/scsi/scsi_lib.c | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 74a39db57d49..1496b34af409 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -736,7 +736,13 @@ static blk_status_t __scsi_error_from_host_byte(struct
> scsi_cmnd *cmd,
>  {
>   switch (host_byte(result)) {
>   case DID_OK:
> - return BLK_STS_OK;
> + /*
> +  * Also check the other bytes than the status byte in
> result
> +  * to handle the case when a SCSI LLD sets result to
> +  * DRIVER_SENSE << 24 without setting
> SAM_STAT_CHECK_CONDITION.
> +  */
> + return scsi_status_is_good(result) && (result & ~0xff) == 0
> ?
> + BLK_STS_OK : BLK_STS_IOERR;

This fixes the problem on my system.

Tested-by: Damien Le Moal 

-- 
Damien Le Moal
Western Digital

Re: [PATCH] tcmu: allow userspace to reset netlink

2018-04-04 Thread Mike Christie
On 04/02/2018 06:42 AM, xiu...@redhat.com wrote:
> From: Xiubo Li 
> 
> This patch adds 1 tcmu attr to reset and complete all the blocked
> netlink waiting threads. It's used when the userspace daemon like
> tcmu-runner has crashed or forced to shutdown just before the
> netlink requests be replied to the kernel, then the netlink requeting
> threads will get stuck forever. We must reboot the machine to recover
> from it and by this the rebootng is not a must then.
> 
> The netlink reset operation should be done before the userspace daemon
> could receive and handle the netlink requests to be safe.
> 
> Signed-off-by: Xiubo Li 
> ---
>  drivers/target/target_core_user.c | 99 
> ---
>  1 file changed, 93 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/target/target_core_user.c 
> b/drivers/target/target_core_user.c
> index 4ad89ea..dc8879d 100644
> --- a/drivers/target/target_core_user.c
> +++ b/drivers/target/target_core_user.c
> @@ -103,9 +103,13 @@ struct tcmu_hba {
>  
>  #define TCMU_CONFIG_LEN 256
>  
> +static spinlock_t nl_complete_lock;
> +static struct idr complete_wait_udevs = IDR_INIT;
> +
>  struct tcmu_nl_cmd {
>   /* wake up thread waiting for reply */
> - struct completion complete;
> + bool complete;
> +
>   int cmd;
>   int status;
>  };
> @@ -159,12 +163,17 @@ struct tcmu_dev {
>  
>   spinlock_t nl_cmd_lock;
>   struct tcmu_nl_cmd curr_nl_cmd;
> - /* wake up threads waiting on curr_nl_cmd */
> + /* wake up threads waiting on nl_cmd_wq */
>   wait_queue_head_t nl_cmd_wq;
>  
> + /* complete thread waiting complete_wq */
> + wait_queue_head_t complete_wq;
> +
>   char dev_config[TCMU_CONFIG_LEN];
>  
>   int nl_reply_supported;
> +
> + uint32_t dev_id;
>  };
>  
>  #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev)
> @@ -251,6 +260,56 @@ static int tcmu_get_global_max_data_area(char *buffer,
>"Max MBs allowed to be allocated to all the tcmu device's "
>"data areas.");
>  
> +static void tcmu_complete_wake_up(struct tcmu_dev *udev)
> +{
> + struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd;
> +
> + spin_lock(_complete_lock);
> + nl_cmd->complete = true;
> + wake_up(>complete_wq);
> + spin_unlock(_complete_lock);
> +}
> +
> +static void tcmu_complete_wake_up_all(void)
> +{
> + struct tcmu_nl_cmd *nl_cmd;
> + struct tcmu_dev *udev;
> + int i;
> +
> + spin_lock(_complete_lock);
> + idr_for_each_entry(_wait_udevs, udev, i) {
> + nl_cmd = >curr_nl_cmd;
> + nl_cmd->complete = true;
> + wake_up(>complete_wq);
> + }
> + spin_unlock(_complete_lock);
> +}
> +
> +static int tcmu_complete_wait(struct tcmu_dev *udev)
> +{
> + struct tcmu_nl_cmd *nl_cmd = >curr_nl_cmd;
> + uint32_t dev_id;
> +
> + spin_lock(_complete_lock);
> + dev_id = idr_alloc(_wait_udevs, udev, 1, USHRT_MAX, 
> GFP_NOWAIT);
> + if (dev_id < 0) {
> + pr_err("tcmu: Could not allocate dev id.\n");
> + return dev_id;
> + }
> + udev->dev_id = dev_id;

dev_id is never used.

I think if you just wanted to loop over all the devices you could just
use a list.

Or,

Just add a helper around target_core_device.c:devices_idr that just
gives you the tcmu devices.



> + spin_unlock(_complete_lock);
> +
> + pr_debug("sleeping for nl reply\n");
> + wait_event(udev->complete_wq, nl_cmd->complete);

I don't think you will need the complete field then or this function.


> +
> + spin_lock(_complete_lock);
> + nl_cmd->complete = false;
> + idr_remove(_wait_udevs, dev_id);
> + spin_unlock(_complete_lock);
> +
> + return 0;
> +}
> +
>  /* multicast group */
>  enum tcmu_multicast_groups {
>   TCMU_MCGRP_CONFIG,
> @@ -311,7 +370,7 @@ static int tcmu_genl_cmd_done(struct genl_info *info, int 
> completed_cmd)
>   if (!is_removed)
>target_undepend_item(>dev_group.cg_item);
>   if (!ret)
> - complete(_cmd->complete);
> + tcmu_complete_wake_up(udev);
>   return ret;
>  }
>  
> @@ -1258,6 +1317,7 @@ static struct se_device *tcmu_alloc_device(struct 
> se_hba *hba, const char *name)
>   timer_setup(>cmd_timer, tcmu_cmd_timedout, 0);
>  
>   init_waitqueue_head(>nl_cmd_wq);
> + init_waitqueue_head(>complete_wq);
>   spin_lock_init(>nl_cmd_lock);
>  
>   INIT_RADIX_TREE(>data_blocks, GFP_KERNEL);
> @@ -1462,7 +1522,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p)
>  
>   kfree(udev->uio_info.name);
>   kfree(udev->name);
> +
> + spin_lock(_complete_lock);
> + idr_remove(_wait_udevs, udev->dev_id);
>   kfree(udev);
> + spin_unlock(_complete_lock);
>  }
>  
>  static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
> @@ -1555,7 +1619,6 @@ static void tcmu_init_genl_cmd_reply(struct tcmu_dev 
> *udev, 

Re: 4.15.14 crash with iscsi target and dvd

2018-04-04 Thread Wakko Warner
Bart Van Assche wrote:
> On Sun, 2018-04-01 at 14:27 -0400, Wakko Warner wrote:
> > Wakko Warner wrote:
> > > Wakko Warner wrote:
> > > > I tested 4.14.32 last night with the same oops.  4.9.91 works fine.
> > > > From the initiator, if I do cat /dev/sr1 > /dev/null it works.  If I 
> > > > mount
> > > > /dev/sr1 and then do find -type f | xargs cat > /dev/null the target
> > > > crashes.  I'm using the builtin iscsi target with pscsi.  I can burn 
> > > > from
> > > > the initiator with out problems.  I'll test other kernels between 4.9 
> > > > and
> > > > 4.14.
> > > 
> > > So I've tested 4.x.y where x one of 10 11 12 14 15 and y is the latest 
> > > patch
> > > (except for 4.15 which was 1 behind)
> > > Each of these kernels crash within seconds or immediate of doing find 
> > > -type
> > > f | xargs cat > /dev/null from the initiator.
> > 
> > I tried 4.10.0.  It doesn't completely lockup the system, but the device
> > that was used hangs.  So from the initiator, it's /dev/sr1 and from the
> > target it's /dev/sr0.  Attempting to read /dev/sr0 after the oops causes the
> > process to hang in D state.
> 
> Hello Wakko,
> 
> Thank you for having narrowed down this further. I think that you encountered
> a regression either in the block layer core or in the SCSI core. Unfortunately
> the number of changes between kernel versions v4.9 and v4.10 in these two
> subsystems is huge. I see two possible ways forward:
> - Either that you perform a bisect to identify the patch that introduced this
>   regression. However, I'm not sure whether you are familiar with the bisect
>   process.
> - Or that you identify the command that triggers this crash such that others
>   can reproduce this issue without needing access to your setup.
> 
> How about reproducing this crash with the below patch applied on top of
> kernel v4.15.x? The additional output sent by this patch to the system log
> should allow us to reproduce this issue by submitting the same SCSI command
> with sg_raw.

Sorry for not getting back in touch.  My internet was down.  I haven't tried
the patch yet.  I'll try to get to that tomorrow.  The system with the issue
is busy and I can't reboot it right now.


Re: [PATCH 2/2] sd_zbc: Avoid errors due to sd_zbc_setup() execution

2018-04-04 Thread Damien Le Moal
Bart,

On 4/5/18 00:22, Bart Van Assche wrote:
> On Wed, 2018-04-04 at 17:54 +0900, Damien Le Moal wrote:
>> Since SCSI scanning occurs asynchronously, since sd_revalidate_disk()
>> is called from sd_probe_async() and since sd_revalidate_disk() calls
>> sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
>> concurrently with operations referencing a drive zone bitmaps and number
>
> 
> Should "a" be changed into "the"?

Yes.


>> [Damien] Updated commit message and changed nr_zones/bitmap swap order.
> 
> Updating the number of zones after having updated the bitmap pointers is not
> sufficient to avoid trouble if the number of zones as reported by the drive
> changes while I/O is in progress. With the current implementation if the
> number of zones changes the seq_zones_bitmap is cleared. Can this cause
> trouble for the mq-deadline scheduler? Additionally, CPUs other than x86 can
> reorder store operations. Even worse, a CPU could cache the zone bitmap
> pointers which means that at least RCU protection + kfree_rcu() is needed to
> avoid trouble. I think we either should handle this case properly or issue a
> kernel warning.

OK. Let's work on that.


-- 
Damien Le Moal,
Western Digital

Re: [PATCH 1/2] sd_zbc: Avoid errors due to sd_zbc_check_zone_size() execution

2018-04-04 Thread Damien Le Moal
Bart,

On 4/5/18 00:08, Bart Van Assche wrote:
> On 04/04/18 01:54, Damien Le Moal wrote:
>>   static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
>>   {
>> +u64 sdkp_zone_blocks = sdkp->zone_blocks;
> 
> Shouldn't this variable be initialized to zero such that zone size 
> changes are accepted even if the SAME field in the REPORT ZONES response 
> is zero?

sdkp_zone_blocks will be 0 when sd_zbc_check_zone_size() is called for
the first scan of a disk and will hold the current disk value if
sd_zbc_check_zone_size() is called on a revalidate after first scan. If
the initial value is 0, there is no check and the variable is first
initialized. Otherwise, the value is compared to the zone size reported.
In both cases, the zone size change will be cought.

But granted, setting the value initially to 0 is easier to understand.
I will also change:

} else {
sdkp->zone_blocks = zone_blocks;

sdkp->zone_shift = ilog2(zone_blocks);

}

to

} else if (sdkp->zone_blocks != zone_blocks) {

sdkp->zone_blocks = zone_blocks;

sdkp->zone_shift = ilog2(zone_blocks);

}

to make things really clear.

Similarly to a capacity change, It may also be good to add a warning
message. After all, on a drive swap, we can have the case where the
capacity does not change, but the zone size does.

Sending a v2.

Best regards.

-- 
Damien Le Moal,
Western Digital

[GIT PULL] first round of SCSI updates for the 4.16+ merge window

2018-04-04 Thread James Bottomley
This is mostly updates of the usual drivers: arcmsr, qla2xx, lpfc, ufs,
mpt3sas, hisi_sas.  In addition we have removed several really old
drivers: sym53c416, NCR53c406a, fdomain, fdomain_cs and removed the old
scsi_module.c initialization from all remaining drivers.  Plus an
assortment of bug fixes, initialization errors and other minor fixes.

This time there's a really nasty merge between the fixes and misc
branches of the SCSI tree which I've resolved in the merge commit: the
non obvious part is that you have to remove some lines from
qla2xxx/qla_gs.c to avoid a compile failure which would cause bisection
 problems, so I've done that and documented it in the merge commit.  If
you'd like to do it yourself, let me know and I'll send the two
branches separately.

The patch is available here:

git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git scsi-for-linus

The short changelog is:

Adrian Hunter (1):
  scsi: ufs: Add support for Auto-Hibernate Idle Timer

Arnd Bergmann (7):
  scsi: mpt3sas: clarify mmio pointer types
  scsi: qedi: fix build regression
  scsi: sym53c416: avoid section mismatch with LTO
  scsi: NCR53c406a: avoid section mismatch with LTO
  scsi: qedf: use correct strncpy() size
  scsi: qedf: fix LTO-enabled build
  scsi: qedi: fix building with LTO

Bart Van Assche (15):
  scsi: ufs: Fix kernel-doc errors and warnings
  scsi: sd_zbc: Fix sd_zbc_get_seq_zones() kernel-doc header
  scsi: libsas: Fix kernel-doc headers
  scsi: core: Reduce number of scsi_test_unit_ready() retries
  scsi: core: Move the eh_deadline module parameter definition
  scsi: core: scmd_eh_abort_handler(): Add a comment
  scsi: pmcraid: Use sgl_alloc_order() and sgl_free_order()
  scsi: pmcraid: Remove an unused structure member
  scsi: ipr: Use sgl_alloc_order() and sgl_free_order()
  scsi: scsi_debug: Simplify request tag decoding
  scsi: qla2xxx: Fix function argument descriptions
  scsi: qla4xxx: Move an array from a .h into a .c file
  scsi: qla4xxx: Remove unused symbols
  scsi: qla2xxx: Remove unused symbols
  scsi: qla2xxx: Use %p for printing pointers

Ching Huang (4):
  scsi: arcmsr: Change driver version to v1.40.00.05-20180309
  scsi: arcmsr: Sleep to avoid CPU stuck too long for waiting adapter ready
  scsi: arcmsr: Handle adapter removed due to thunderbolt cable 
disconnection.
  scsi: arcmsr: Rename ACB_F_BUS_HANG_ON to ACB_F_ADAPTER_REMOVED for 
adapter hot-plug

Christoph Hellwig (11):
  scsi: remove the old scsi_module.c initialization model
  scsi: remove the sym53c416 driver
  scsi: remove the NCR53c406a driver
  scsi: remove the fdomain and fdomain_cs drivers
  scsi: mvme147: stop using scsi_module.c
  scsi: esas2r: remove initialization / cleanup dead wood
  scsi: core: unexport scsi_host_set_state
  scsi: documentation: remove ChangeLog.1992-1997
  scsi: aha1740: stop using scsi_unregister
  scsi: ips: don't set .detect and .release in the host template
  scsi: dpt_i2o: stop using scsi_unregister

Colin Ian King (7):
  scsi: qla2xxx: fix spelling mistake: "existant" -> "existent"
  scsi: lpfc: make several unions static, fix non-ANSI prototype
  scsi: scsi_transport_spi: make two const arrays static, shrinks object 
size
  scsi: pmcraid: remove redundant initializations of pointer 'ioadl'
  scsi: isci: remove redundant initialization to 'bit'
  scsi: libfc: remove redundant initialization of 'disc'
  scsi: qedf: remove redundant initialization of 'fcport'

Dan Carpenter (3):
  scsi: dpt_i2o: use after free in __adpt_reset()
  scsi: dpt_i2o: use after free in adpt_release()
  scsi: atp870u: 64 bit bug in atp885_init()

Darren Trapp (10):
  scsi: qla2xxx: Cleanup code to improve FC-NVMe error handling
  scsi: qla2xxx: Fix FC-NVMe IO abort during driver reset
  scsi: qla2xxx: Fix retry for PRLI RJT with reason of BUSY
  scsi: qla2xxx: Remove nvme_done_list
  scsi: qla2xxx: Return busy if rport going away
  scsi: qla2xxx: Fix n2n_ae flag to prevent dev_loss on PDB change
  scsi: qla2xxx: Add FC-NVMe abort processing
  scsi: qla2xxx: Add changes for devloss timeout in driver
  scsi: qla2xxx: Set IIDMA and fcport state before 
qla_nvme_register_remote()
  scsi: qla2xxx: Restore ZIO threshold setting

Don Brace (1):
  scsi: smartpqi: update driver version

Douglas Gilbert (2):
  scsi: core: Make SCSI Status CONDITION MET equivalent to GOOD
  scsi: scsi_debug: implement IMMED bit

Finn Thain (1):
  scsi: jazz_esp, sun3x_esp: Pass struct device pointer in dma calls

Geert Uytterhoeven (1):
  scsi: hisi_sas: Remove depends on HAS_DMA in case of platform dependency

Hannes Reinecke (1):
  scsi: raid_class: Add 'JBOD' RAID level

James Smart (43):
  scsi: lpfc: Change Copyright of 12.0.0.1 modified files to 2018
  scsi: lpfc: update driver 

RE: aacraid code passes GFP_DMA32 to kmalloc this will not work

2018-04-04 Thread Dave Carroll
> -Original Message-
> From: Hans de Goede [mailto:hdego...@redhat.com]
> Sent: Thursday, March 29, 2018 8:51 AM
> To: dl-esc-Aacraid Linux Driver ; James E.J.
> Bottomley ; Martin K. Petersen
> ; SCSI development list  s...@vger.kernel.org>; Linux Kernel Mailing List 
> 
> Subject: aacraid code passes GFP_DMA32 to kmalloc this will not work
> 
> 
> Hi All,
> 
> Since I made the same mistake myself I've done a quick grep for
> GFP_DMA32 in the kernel and drivers/scsi/aacraid/commctrl.c came up as a
> result of this grep, it does:
> 
>  p = kmalloc(sg_count[i], 
> GFP_KERNEL|GFP_DMA32);
> 
> But kmalloc always returns memory from the normal memory-zone, if you need
> memory from a specific memory-zone like the
> DMA32 zone, you must use the dma allocation functions (which from a quick
> glance at the code seems appropriate here) or directly call alloc_page or
> __get_free_page.

Hi Hans,

I did run into this issue myself recently ... Thanks for a reminder to send in 
a patch to
correct this.

-Dave
> 
> Regards,
> 
> Hans


Re: usercopy whitelist woe in scsi_sense_cache

2018-04-04 Thread Oleksandr Natalenko

Hi.

04.04.2018 23:25, Kees Cook wrote:

Actually, I can trigger a BUG too:

[  129.259213] usercopy: Kernel memory exposure attempt detected from 
SLUB

object 'scsi_sense_cache' (offset 119, size 22)!


Wow, yeah, that's totally outside the slub object_size. How did you
trigger this? Just luck or something specific?


Just luck, I suppose. It usually comes after the first warning if you 
wait long enough (maybe, a couple of extra minutes).


To give you an idea regarding variety of offsets, I've summarised kernel 
log from the server:


$ sudo journalctl -kb | grep "Kernel memory exposure attempt detected" | 
grep -oE 'offset [0-9]+, size [0-9]+' | sort | uniq -c

  9 offset 107, size 22
  6 offset 108, size 22
  8 offset 109, size 22
  7 offset 110, size 22
  5 offset 111, size 22
  5 offset 112, size 22
  2 offset 113, size 22
  2 offset 114, size 22
  1 offset 115, size 22
  1 offset 116, size 22
  1 offset 119, size 22
  1 offset 85, size 22


I'd really like to understand how the buffer position can be
changing... I'd expect that to break all kinds of things (i.e.
free()ing the slab later would break too...)


I haven't checked the code yet, but the first thing that comes to my 
mind is some uninitialised variable. Just guessing here, though.



Thanks for the report! I hope someone more familiar with sg_io() can
help explain the changing buffer offset... :P


Hopefully, SCSI people are Cc'ed here properly…

Thanks!

Regards,
  Oleksandr


Re: usercopy whitelist woe in scsi_sense_cache

2018-04-04 Thread Kees Cook
On Wed, Apr 4, 2018 at 1:49 PM, Oleksandr Natalenko
 wrote:
> Hi.
>
> On středa 4. dubna 2018 22:21:53 CEST Kees Cook wrote:
>>
>> ...
>> That means scsi_sense_cache should be 96 bytes in size? But a 22 byte
>> read starting at offset 94 happened? That seems like a 20 byte read
>> beyond the end of the SLUB object? Though if it were reading past the
>> actual end of the object, I'd expect the hardened usercopy BUG (rather
>> than the WARN) to kick in. Ah, it looks like
>> /sys/kernel/slab/scsi_sense_cache/slab_size shows this to be 128 bytes
>> of actual allocation, so the 20 bytes doesn't strictly overlap another
>> object (hence no BUG):
>> ...
>
>
> Actually, I can trigger a BUG too:
>
> [  129.259213] usercopy: Kernel memory exposure attempt detected from SLUB
> object 'scsi_sense_cache' (offset 119, size 22)!

Wow, yeah, that's totally outside the slub object_size. How did you
trigger this? Just luck or something specific?

> [  129.265167] [ cut here ]
> [  129.267579] kernel BUG at mm/usercopy.c:100!
>
> And also offset can be different, as you may see:
>
> [   55.993224] Bad or missing usercopy whitelist? Kernel memory exposure
> attempt detected from SLUB object 'scsi_sense_cache' (offset 76, size 22)!
> [   55.998678] WARNING: CPU: 0 PID: 1305 at mm/usercopy.c:81 usercopy_warn
> +0x7e/0xa0
>
> It looks like only the size stays the same.

I'd really like to understand how the buffer position can be
changing... I'd expect that to break all kinds of things (i.e.
free()ing the slab later would break too...)

>> Can you send me your .config? What SCSI drivers are you using in the
>> VM and on the real server?
>
> This is an Arch kernel with a config available here [1].
>
> For both server and VM "lspci -vv" shows "ahci" in use. Is this what you are
> asking for?

I think so, yeah.

>> Are you able to see what ioctl()s smartctl is issuing? I'll try to
>> reproduce this on my end...
>
> As per [2], strace shows "SG_IO" requests. Is this detailed enough?

That's useful, yeah. I'll try Douglas's suggestion of "smartctl -r
scsiioctl,3 ..." too.

> Thanks for looking into it.

Thanks for the report! I hope someone more familiar with sg_io() can
help explain the changing buffer offset... :P

-Kees

-- 
Kees Cook
Pixel Security


Re: usercopy whitelist woe in scsi_sense_cache

2018-04-04 Thread Oleksandr Natalenko

Hi.

On středa 4. dubna 2018 22:21:53 CEST Kees Cook wrote:

...
That means scsi_sense_cache should be 96 bytes in size? But a 22 byte
read starting at offset 94 happened? That seems like a 20 byte read
beyond the end of the SLUB object? Though if it were reading past the
actual end of the object, I'd expect the hardened usercopy BUG (rather
than the WARN) to kick in. Ah, it looks like
/sys/kernel/slab/scsi_sense_cache/slab_size shows this to be 128 bytes
of actual allocation, so the 20 bytes doesn't strictly overlap another
object (hence no BUG):
...


Actually, I can trigger a BUG too:

[  129.259213] usercopy: Kernel memory exposure attempt detected from 
SLUB

object 'scsi_sense_cache' (offset 119, size 22)!
[  129.265167] [ cut here ]
[  129.267579] kernel BUG at mm/usercopy.c:100!

And also offset can be different, as you may see:

[   55.993224] Bad or missing usercopy whitelist? Kernel memory exposure
attempt detected from SLUB object 'scsi_sense_cache' (offset 76, size 
22)!
[   55.998678] WARNING: CPU: 0 PID: 1305 at mm/usercopy.c:81 
usercopy_warn

+0x7e/0xa0

It looks like only the size stays the same.


Can you send me your .config? What SCSI drivers are you using in the
VM and on the real server?


This is an Arch kernel with a config available here [1].

For both server and VM "lspci -vv" shows "ahci" in use. Is this what you 
are

asking for?


Are you able to see what ioctl()s smartctl is issuing? I'll try to
reproduce this on my end...


As per [2], strace shows "SG_IO" requests. Is this detailed enough?

Thanks for looking into it.

Regards,
  Oleksandr

[1] https://git.archlinux.org/svntogit/packages.git/plain/trunk/config?
h=packages/linux=d7625be23f83416491d202d5cea96e5a871fb216
[2] https://gist.github.com/6f58f8891468aeba1ab2cc9f45668735


Re: usercopy whitelist woe in scsi_sense_cache

2018-04-04 Thread Douglas Gilbert

On 2018-04-04 04:32 PM, Kees Cook wrote:

On Wed, Apr 4, 2018 at 12:07 PM, Oleksandr Natalenko
 wrote:

[  261.262135] Bad or missing usercopy whitelist? Kernel memory exposure
attempt detected from SLUB object 'scsi_sense_cache' (offset 94, size 22)!
I can easily reproduce it with a qemu VM and 2 virtual SCSI disks by calling
smartctl in a loop and doing some usual background I/O. The warning is
triggered within 3 minutes or so (not instantly).


Also:

Can you send me your .config? What SCSI drivers are you using in the
VM and on the real server?

Are you able to see what ioctl()s smartctl is issuing? I'll try to
reproduce this on my end...


smartctl -r scsiioctl,3 


Re: usercopy whitelist woe in scsi_sense_cache

2018-04-04 Thread Douglas Gilbert

On 2018-04-04 04:21 PM, Kees Cook wrote:

On Wed, Apr 4, 2018 at 12:07 PM, Oleksandr Natalenko
 wrote:

With v4.16 I get the following dump while using smartctl:
[...]
[  261.262135] Bad or missing usercopy whitelist? Kernel memory exposure
attempt detected from SLUB object 'scsi_sense_cache' (offset 94, size 22)!
[...]
[  261.345976] Call Trace:
[  261.350620]  __check_object_size+0x130/0x1a0
[  261.355775]  sg_io+0x269/0x3f0
[  261.360729]  ? path_lookupat+0xaa/0x1f0
[  261.364027]  ? current_time+0x18/0x70
[  261.366684]  scsi_cmd_ioctl+0x257/0x410
[  261.369871]  ? xfs_bmapi_read+0x1c3/0x340 [xfs]
[  261.372231]  sd_ioctl+0xbf/0x1a0 [sd_mod]
[  261.375456]  blkdev_ioctl+0x8ca/0x990
[  261.381156]  ? read_null+0x10/0x10
[  261.384984]  block_ioctl+0x39/0x40
[  261.388739]  do_vfs_ioctl+0xa4/0x630
[  261.392624]  ? vfs_write+0x164/0x1a0
[  261.396658]  SyS_ioctl+0x74/0x80
[  261.399563]  do_syscall_64+0x74/0x190
[  261.402685]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2


This is:

sg_io+0x269/0x3f0:
blk_complete_sghdr_rq at block/scsi_ioctl.c:280
  (inlined by) sg_io at block/scsi_ioctl.c:376

which is:

 if (req->sense_len && hdr->sbp) {
 int len = min((unsigned int) hdr->mx_sb_len, req->sense_len);

 if (!copy_to_user(hdr->sbp, req->sense, len))
 hdr->sb_len_wr = len;
 else
 ret = -EFAULT;
 }


[...]
I can easily reproduce it with a qemu VM and 2 virtual SCSI disks by calling
smartctl in a loop and doing some usual background I/O. The warning is
triggered within 3 minutes or so (not instantly).

Initially, it was produced on my server after a kernel update (because disks
are monitored with smartctl via Zabbix).

Looks like the thing was introduced with
0afe76e88c57d91ef5697720aed380a339e3df70.

Any idea how to deal with this please? If needed, I can provide any additional
info, and also I'm happy/ready to test any proposed patches.


Interesting, and a little confusing. So, what's strange here is that
the scsi_sense_cache already has a full whitelist:

kmem_cache_create_usercopy("scsi_sense_cache",
SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN,
0, SCSI_SENSE_BUFFERSIZE, NULL);

Arg 2 is the buffer size, arg 5 is the whitelist offset (0), and the
whitelist size (same as arg2). In other words, the entire buffer
should be whitelisted.

include/scsi/scsi_cmnd.h says:

#define SCSI_SENSE_BUFFERSIZE  96

That means scsi_sense_cache should be 96 bytes in size? But a 22 byte
read starting at offset 94 happened? That seems like a 20 byte read
beyond the end of the SLUB object? Though if it were reading past the
actual end of the object, I'd expect the hardened usercopy BUG (rather
than the WARN) to kick in. Ah, it looks like
/sys/kernel/slab/scsi_sense_cache/slab_size shows this to be 128 bytes
of actual allocation, so the 20 bytes doesn't strictly overlap another
object (hence no BUG):

/sys/kernel/slab/scsi_sense_cache# grep . object_size usersize slab_size
object_size:96
usersize:96
slab_size:128

Ah, right, due to SLAB_HWCACHE_ALIGN, the allocation is rounded up to
the next cache line size, so there's 32 bytes of padding to reach 128.

James or Martin, is this over-read "expected" behavior? i.e. does the
sense cache buffer usage ever pull the ugly trick of silently
expanding its allocation into the space the slab allocator has given
it? If not, this looks like a real bug.

What I don't see is how req->sense is _not_ at offset 0 in the
scsi_sense_cache object...


Looking at the smartctl SCSI code it pulls 32 byte sense buffers.
Can't see 22 anywhere relevant in its code.

There are two types of sense: fixed and descriptor: with fixed you
seldom need more than 18 bytes (but it can only represent 32 bit
LBAs). The other type has a header and 0 or more variable length
descriptors. If decoding of descriptor sense went wrong you might
end up at offset 94. But not with smartctl 

Doug Gilbert


Re: usercopy whitelist woe in scsi_sense_cache

2018-04-04 Thread Kees Cook
On Wed, Apr 4, 2018 at 12:07 PM, Oleksandr Natalenko
 wrote:
> [  261.262135] Bad or missing usercopy whitelist? Kernel memory exposure
> attempt detected from SLUB object 'scsi_sense_cache' (offset 94, size 22)!
> I can easily reproduce it with a qemu VM and 2 virtual SCSI disks by calling
> smartctl in a loop and doing some usual background I/O. The warning is
> triggered within 3 minutes or so (not instantly).

Also:

Can you send me your .config? What SCSI drivers are you using in the
VM and on the real server?

Are you able to see what ioctl()s smartctl is issuing? I'll try to
reproduce this on my end...

-Kees

-- 
Kees Cook
Pixel Security


Re: usercopy whitelist woe in scsi_sense_cache

2018-04-04 Thread Kees Cook
On Wed, Apr 4, 2018 at 12:07 PM, Oleksandr Natalenko
 wrote:
> With v4.16 I get the following dump while using smartctl:
> [...]
> [  261.262135] Bad or missing usercopy whitelist? Kernel memory exposure
> attempt detected from SLUB object 'scsi_sense_cache' (offset 94, size 22)!
> [...]
> [  261.345976] Call Trace:
> [  261.350620]  __check_object_size+0x130/0x1a0
> [  261.355775]  sg_io+0x269/0x3f0
> [  261.360729]  ? path_lookupat+0xaa/0x1f0
> [  261.364027]  ? current_time+0x18/0x70
> [  261.366684]  scsi_cmd_ioctl+0x257/0x410
> [  261.369871]  ? xfs_bmapi_read+0x1c3/0x340 [xfs]
> [  261.372231]  sd_ioctl+0xbf/0x1a0 [sd_mod]
> [  261.375456]  blkdev_ioctl+0x8ca/0x990
> [  261.381156]  ? read_null+0x10/0x10
> [  261.384984]  block_ioctl+0x39/0x40
> [  261.388739]  do_vfs_ioctl+0xa4/0x630
> [  261.392624]  ? vfs_write+0x164/0x1a0
> [  261.396658]  SyS_ioctl+0x74/0x80
> [  261.399563]  do_syscall_64+0x74/0x190
> [  261.402685]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2

This is:

sg_io+0x269/0x3f0:
blk_complete_sghdr_rq at block/scsi_ioctl.c:280
 (inlined by) sg_io at block/scsi_ioctl.c:376

which is:

if (req->sense_len && hdr->sbp) {
int len = min((unsigned int) hdr->mx_sb_len, req->sense_len);

if (!copy_to_user(hdr->sbp, req->sense, len))
hdr->sb_len_wr = len;
else
ret = -EFAULT;
}

> [...]
> I can easily reproduce it with a qemu VM and 2 virtual SCSI disks by calling
> smartctl in a loop and doing some usual background I/O. The warning is
> triggered within 3 minutes or so (not instantly).
>
> Initially, it was produced on my server after a kernel update (because disks
> are monitored with smartctl via Zabbix).
>
> Looks like the thing was introduced with
> 0afe76e88c57d91ef5697720aed380a339e3df70.
>
> Any idea how to deal with this please? If needed, I can provide any additional
> info, and also I'm happy/ready to test any proposed patches.

Interesting, and a little confusing. So, what's strange here is that
the scsi_sense_cache already has a full whitelist:

   kmem_cache_create_usercopy("scsi_sense_cache",
   SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN,
   0, SCSI_SENSE_BUFFERSIZE, NULL);

Arg 2 is the buffer size, arg 5 is the whitelist offset (0), and the
whitelist size (same as arg2). In other words, the entire buffer
should be whitelisted.

include/scsi/scsi_cmnd.h says:

#define SCSI_SENSE_BUFFERSIZE  96

That means scsi_sense_cache should be 96 bytes in size? But a 22 byte
read starting at offset 94 happened? That seems like a 20 byte read
beyond the end of the SLUB object? Though if it were reading past the
actual end of the object, I'd expect the hardened usercopy BUG (rather
than the WARN) to kick in. Ah, it looks like
/sys/kernel/slab/scsi_sense_cache/slab_size shows this to be 128 bytes
of actual allocation, so the 20 bytes doesn't strictly overlap another
object (hence no BUG):

/sys/kernel/slab/scsi_sense_cache# grep . object_size usersize slab_size
object_size:96
usersize:96
slab_size:128

Ah, right, due to SLAB_HWCACHE_ALIGN, the allocation is rounded up to
the next cache line size, so there's 32 bytes of padding to reach 128.

James or Martin, is this over-read "expected" behavior? i.e. does the
sense cache buffer usage ever pull the ugly trick of silently
expanding its allocation into the space the slab allocator has given
it? If not, this looks like a real bug.

What I don't see is how req->sense is _not_ at offset 0 in the
scsi_sense_cache object...

-Kees

-- 
Kees Cook
Pixel Security


Re: [PATCH v2] Fix DID_OK handling in __scsi_error_from_host_byte()

2018-04-04 Thread Lee Duncan
On 04/04/2018 10:53 AM, Bart Van Assche wrote:
> Commit e39a97353e53 modified __scsi_error_from_host_byte() such
> that that function translates DID_OK into BLK_STS_OK. However,
> the description of that commit is wrong: it mentions that commit
> 2a842acab109 introduced a bug in __scsi_error_from_host_byte()
> although that commit did not change the behavior of that function.
> Additionally, commit e39a97353e53 introduced a severe bug: it causes
> commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE
> to be completed with BLK_STS_OK. Fix __scsi_error_from_host_byte()
> by only translating good status values into BLK_STS_OK.
> 
> Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in 
> __scsi_error_from_host_byte()")
> Reported-by: Damien Le Moal 
> Signed-off-by: Bart Van Assche 
> Cc: Hannes Reinecke 
> Cc: Douglas Gilbert 
> Cc: Damien Le Moal 
> Cc: Christoph Hellwig 
> Cc: sta...@vger.kernel.org
> ---
> 
> Changes compared to v1:
> - Modified __scsi_error_from_host_byte() such that it again returns
>   BLK_STS_OK for CONDITION MET and other result codes that represent
>   success.
> 
>  drivers/scsi/scsi_lib.c | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 74a39db57d49..1496b34af409 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -736,7 +736,13 @@ static blk_status_t __scsi_error_from_host_byte(struct 
> scsi_cmnd *cmd,
>  {
>   switch (host_byte(result)) {
>   case DID_OK:
> - return BLK_STS_OK;
> + /*
> +  * Also check the other bytes than the status byte in result
> +  * to handle the case when a SCSI LLD sets result to
> +  * DRIVER_SENSE << 24 without setting SAM_STAT_CHECK_CONDITION.
> +  */
> + return scsi_status_is_good(result) && (result & ~0xff) == 0 ?
> + BLK_STS_OK : BLK_STS_IOERR;
>   case DID_TRANSPORT_FAILFAST:
>   return BLK_STS_TRANSPORT;
>   case DID_TARGET_FAILURE:
> 

Reviewed-by: Lee Duncan 


[PATCH] target: change default dbroot to /etc/target

2018-04-04 Thread Lee Duncan
The dbroot (target PR database root directory) is
configurable but default to /var/target, a historic
value. But the reason for adding configurability
was to move the target directory out of /var. This
is because the File Hierarchy Standard v3.0 mandates
that this "target" directory not be in /var. See
https://refspecs.linuxfoundation.org/FHS_3.0/fhs-3.0.pdf

This change moves the default from /var/target to
/etc/target, but this value is still configurable,
so those wishing to continue to use /var/target
can still do so.

Signed-off-by: Lee Duncan 
---
 drivers/target/target_core_internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_internal.h 
b/drivers/target/target_core_internal.h
index 1d5afc3ae017..34eccef975b7 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -165,7 +165,7 @@ extern struct se_portal_group xcopy_pt_tpg;
 
 /* target_core_configfs.c */
 #define DB_ROOT_LEN4096
-#defineDB_ROOT_DEFAULT "/var/target"
+#defineDB_ROOT_DEFAULT "/etc/target"
 
 extern char db_root[];
 
-- 
2.13.6



usercopy whitelist woe in scsi_sense_cache

2018-04-04 Thread Oleksandr Natalenko
Hi, Kees, David et al.

With v4.16 I get the following dump while using smartctl:

===
[  261.260617] [ cut here ]
[  261.262135] Bad or missing usercopy whitelist? Kernel memory exposure 
attempt detected from SLUB object 'scsi_sense_cache' (offset 94, size 22)!
[  261.267672] WARNING: CPU: 2 PID: 27041 at mm/usercopy.c:81 usercopy_warn
+0x7e/0xa0
[  261.273624] Modules linked in: nls_iso8859_1 nls_cp437 vfat fat kvm_intel 
kvm iTCO_wdt ppdev irqbypass bochs_drm ttm iTCO_vendor_support drm_kms_helper 
drm psmouse input_leds led_class pcspkr joydev intel_agp parport_pc mousedev 
evdev syscopyarea intel_gtt i2c_i801 sysfillrect parport rtc_cmos sysimgblt 
qemu_fw_cfg mac_hid agpgart fb_sys_fops lpc_ich ip_tables x_tables xfs 
dm_thin_pool dm_persistent_data dm_bio_prison dm_bufio libcrc32c 
crc32c_generic dm_crypt algif_skcipher af_alg dm_mod raid10 md_mod hid_generic 
usbhid hid sr_mod cdrom sd_mod crct10dif_pclmul uhci_hcd crc32_pclmul 
crc32c_intel ghash_clmulni_intel pcbc serio_raw ahci atkbd aesni_intel 
xhci_pci aes_x86_64 ehci_pci libahci crypto_simd libps2 glue_helper xhci_hcd 
ehci_hcd libata cryptd usbcore usb_common i8042 serio virtio_scsi scsi_mod
[  261.300752]  virtio_blk virtio_net virtio_pci virtio_ring virtio
[  261.305534] CPU: 2 PID: 27041 Comm: smartctl Not tainted 4.16.0-1-ARCH #1
[  261.309936] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 
02/06/2015
[  261.313668] RIP: 0010:usercopy_warn+0x7e/0xa0
[  261.315653] RSP: 0018:ab5aca6cfc40 EFLAGS: 00010286
[  261.320038] RAX:  RBX: 8e8cd893605e RCX: 
0001
[  261.322215] RDX: 8001 RSI: 83eb4672 RDI: 

[  261.325680] RBP: 0016 R08:  R09: 
0282
[  261.328462] R10: 83e896b1 R11: 0001 R12: 
0001
[  261.330584] R13: 8e8cd8936074 R14: 8e8cd893605e R15: 
0016
[  261.332748] FS:  7f5a81bdf040() GS:8e8cdf70() knlGS:

[  261.337929] CS:  0010 DS:  ES:  CR0: 80050033
[  261.343128] CR2: 7fff3a6790a8 CR3: 18228006 CR4: 
00160ee0
[  261.345976] Call Trace:
[  261.350620]  __check_object_size+0x130/0x1a0
[  261.355775]  sg_io+0x269/0x3f0
[  261.360729]  ? path_lookupat+0xaa/0x1f0
[  261.364027]  ? current_time+0x18/0x70
[  261.366684]  scsi_cmd_ioctl+0x257/0x410
[  261.369871]  ? xfs_bmapi_read+0x1c3/0x340 [xfs]
[  261.372231]  sd_ioctl+0xbf/0x1a0 [sd_mod]
[  261.375456]  blkdev_ioctl+0x8ca/0x990
[  261.381156]  ? read_null+0x10/0x10
[  261.384984]  block_ioctl+0x39/0x40
[  261.388739]  do_vfs_ioctl+0xa4/0x630
[  261.392624]  ? vfs_write+0x164/0x1a0
[  261.396658]  SyS_ioctl+0x74/0x80
[  261.399563]  do_syscall_64+0x74/0x190
[  261.402685]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[  261.414154] RIP: 0033:0x7f5a8115ed87
[  261.417184] RSP: 002b:7fff3a65a458 EFLAGS: 0246 ORIG_RAX: 
0010
[  261.427362] RAX: ffda RBX: 7fff3a65a700 RCX: 
7f5a8115ed87
[  261.432075] RDX: 7fff3a65a470 RSI: 2285 RDI: 
0003
[  261.436200] RBP: 7fff3a65a750 R08: 0010 R09: 

[  261.446689] R10:  R11: 0246 R12: 
55b5481d9ce0
[  261.450059] R13:  R14: 55b5481d3550 R15: 
00da
[  261.455103] Code: 48 c7 c0 f1 af e5 83 48 0f 44 c2 41 50 51 41 51 48 89 f9 
49 89 f1 4d 89 d8 4c 89 d2 48 89 c6 48 c7 c7 48 b0 e5 83 e8 32 a7 e3 ff <0f> 
0b 48 83 c4 18 c3 48 c7 c6 44 0d e5 83 49 89 f1 49 89 f3 eb 
[  261.467988] ---[ end trace 75034b3832c364e4 ]---
===

I can easily reproduce it with a qemu VM and 2 virtual SCSI disks by calling 
smartctl in a loop and doing some usual background I/O. The warning is 
triggered within 3 minutes or so (not instantly).

Initially, it was produced on my server after a kernel update (because disks 
are monitored with smartctl via Zabbix).

Looks like the thing was introduced with 
0afe76e88c57d91ef5697720aed380a339e3df70.

Any idea how to deal with this please? If needed, I can provide any additional 
info, and also I'm happy/ready to test any proposed patches.

Thanks.

Regards,
  Oleksandr




Re: [PATCH v2] Fix DID_OK handling in __scsi_error_from_host_byte()

2018-04-04 Thread Douglas Gilbert

On 2018-04-04 01:53 PM, Bart Van Assche wrote:

Commit e39a97353e53 modified __scsi_error_from_host_byte() such
that that function translates DID_OK into BLK_STS_OK. However,
the description of that commit is wrong: it mentions that commit
2a842acab109 introduced a bug in __scsi_error_from_host_byte()
although that commit did not change the behavior of that function.
Additionally, commit e39a97353e53 introduced a severe bug: it causes
commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE
to be completed with BLK_STS_OK. Fix __scsi_error_from_host_byte()
by only translating good status values into BLK_STS_OK.

Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in 
__scsi_error_from_host_byte()")
Reported-by: Damien Le Moal 
Signed-off-by: Bart Van Assche 
Cc: Hannes Reinecke 
Cc: Douglas Gilbert 


Reviewed-by: Douglas Gilbert 


Cc: Damien Le Moal 
Cc: Christoph Hellwig 
Cc: sta...@vger.kernel.org
---


There is some urgency here as the faulty patch is in lk 4.16.0
release.

My patch was prepared independently of seeing this one. Both
do the job (mine has a redundant status_byte(result) in the
conditional). And the function name is now completely misleading
but due to the urgency, that can wait for another day.


Changes compared to v1:
- Modified __scsi_error_from_host_byte() such that it again returns
   BLK_STS_OK for CONDITION MET and other result codes that represent
   success.

  drivers/scsi/scsi_lib.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 74a39db57d49..1496b34af409 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -736,7 +736,13 @@ static blk_status_t __scsi_error_from_host_byte(struct 
scsi_cmnd *cmd,
  {
switch (host_byte(result)) {
case DID_OK:
-   return BLK_STS_OK;
+   /*
+* Also check the other bytes than the status byte in result
+* to handle the case when a SCSI LLD sets result to
+* DRIVER_SENSE << 24 without setting SAM_STAT_CHECK_CONDITION.
+*/
+   return scsi_status_is_good(result) && (result & ~0xff) == 0 ?
+   BLK_STS_OK : BLK_STS_IOERR;
case DID_TRANSPORT_FAILFAST:
return BLK_STS_TRANSPORT;
case DID_TARGET_FAILURE:





Re: [PATCH] scsi: fix __scsi_error_from_host_byte() breakage

2018-04-04 Thread Bart Van Assche
On Wed, 2018-04-04 at 14:14 -0400, Douglas Gilbert wrote:
> Patch e39a97353e53 titled "scsi: core: return BLK_STS_OK for DID_OK in 
> __scsi_error_from_host_byte()" attempted to make that function return
> BLK_STS_OK when it was given host_byte(result)==DID_OK. While that seems
> sensible, it failed to take into account that there may be errors present
> in the driver_byte and the status byte. Add those checks and expand
> description of function accordingly.

Hello Doug,

Please clarify why you posted a new patch instead of reviewing "[PATCH v2]
Fix DID_OK handling in __scsi_error_from_host_byte()". Did you perhaps miss
that patch?

Thanks,

Bart.





[PATCH] scsi: fix __scsi_error_from_host_byte() breakage

2018-04-04 Thread Douglas Gilbert
Patch e39a97353e53 titled "scsi: core: return BLK_STS_OK for DID_OK in 
__scsi_error_from_host_byte()" attempted to make that function return
BLK_STS_OK when it was given host_byte(result)==DID_OK. While that seems
sensible, it failed to take into account that there may be errors present
in the driver_byte and the status byte. Add those checks and expand
description of function accordingly.

Signed-off-by: Douglas Gilbert 
---
The function would be better called blk_status_from_scsi_result(). Is
the leading "__" needed?

 drivers/scsi/scsi_lib.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index aaf485e36450..6a8bf312d82f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -727,13 +727,18 @@ static bool scsi_end_request(struct request *req, 
blk_status_t error,
  * @cmd:   SCSI command (unused)
  * @result:scsi error code
  *
- * Translate SCSI error code into block errors.
+ * Translate SCSI error code into block errors. In the case of result==DID_OK
+ * also check driver and status bytes for errors.
  */
 static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
int result)
 {
switch (host_byte(result)) {
case DID_OK:
+   if (driver_byte(result) != DRIVER_OK)
+   return BLK_STS_IOERR;
+   else if (status_byte(result) && !scsi_status_is_good(result))
+   return BLK_STS_IOERR;  /* e.g. reservation conflict */
return BLK_STS_OK;
case DID_TRANSPORT_FAILFAST:
return BLK_STS_TRANSPORT;
-- 
2.14.1



[PATCH v2] Fix DID_OK handling in __scsi_error_from_host_byte()

2018-04-04 Thread Bart Van Assche
Commit e39a97353e53 modified __scsi_error_from_host_byte() such
that that function translates DID_OK into BLK_STS_OK. However,
the description of that commit is wrong: it mentions that commit
2a842acab109 introduced a bug in __scsi_error_from_host_byte()
although that commit did not change the behavior of that function.
Additionally, commit e39a97353e53 introduced a severe bug: it causes
commands that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE
to be completed with BLK_STS_OK. Fix __scsi_error_from_host_byte()
by only translating good status values into BLK_STS_OK.

Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in 
__scsi_error_from_host_byte()")
Reported-by: Damien Le Moal 
Signed-off-by: Bart Van Assche 
Cc: Hannes Reinecke 
Cc: Douglas Gilbert 
Cc: Damien Le Moal 
Cc: Christoph Hellwig 
Cc: sta...@vger.kernel.org
---

Changes compared to v1:
- Modified __scsi_error_from_host_byte() such that it again returns
  BLK_STS_OK for CONDITION MET and other result codes that represent
  success.

 drivers/scsi/scsi_lib.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 74a39db57d49..1496b34af409 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -736,7 +736,13 @@ static blk_status_t __scsi_error_from_host_byte(struct 
scsi_cmnd *cmd,
 {
switch (host_byte(result)) {
case DID_OK:
-   return BLK_STS_OK;
+   /*
+* Also check the other bytes than the status byte in result
+* to handle the case when a SCSI LLD sets result to
+* DRIVER_SENSE << 24 without setting SAM_STAT_CHECK_CONDITION.
+*/
+   return scsi_status_is_good(result) && (result & ~0xff) == 0 ?
+   BLK_STS_OK : BLK_STS_IOERR;
case DID_TRANSPORT_FAILFAST:
return BLK_STS_TRANSPORT;
case DID_TARGET_FAILURE:
-- 
2.16.2



Re: [PATCH] Revert "scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"

2018-04-04 Thread Douglas Gilbert

On 2018-04-04 12:30 PM, Bart Van Assche wrote:

The description of commit e39a97353e53 is wrong: it mentions that
commit 2a842acab109 introduced a bug in __scsi_error_from_host_byte()
although that commit did not change the behavior of that function.
Additionally, that commit introduced a severe bug: it causes commands
that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE to be
completed with BLK_STS_OK. Although that commit claims to fix a bug
it does not mention which bug it fixes. Hence revert that commit.


That patch arose from the thread started by me titled:
   [PATCH v2] Make SCSI Status CONDITION MET equivalent to GOOD
on 26 February this year. Hannes Reinecke made the case for
reducing the helper scsi_io_completion_nz_result() to a single
returned value (an idea that was applied and later reverted).
He wrote:

> Hmm. Can't we return blk_stat from this function, and adjusting the
> 'result' value after it with an if-clause like
>
> if (blk_stat == BLK_STS_OK)
>result = 0;
>
> That would cleanup up the function and avoid having (essentially) two
> return values.
>
> The only problem here is that __scsi_error_from_hostbyte() will return
> BLK_STS_IOERR if result == 0; doubt that is intended.
> And I guess it'll affect this issue, too.
>
> Mind sending a separate patch for that?

Actually Hannes had already sent a patch for that before I had time to
react (on the same day). That is the patch you now plan to revert.


In Hannes' defence (and I reviewed it) it is counter intuitive that for
callers to work properly, this function supplied with DID_OK in the
result argument should return BLK_STS_IOERR.

Please find out why that is so and craft a comment so nobody else falls
down this particular rabbit hole. Plus add 'case DID_OK:' before the
'default:' to emphasis the point.

Doug Gilbert


Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in 
__scsi_error_from_host_byte()")
Reported-by: Damien Le Moal 
Signed-off-by: Bart Van Assche 
Cc: Hannes Reinecke 
Cc: Douglas Gilbert 
Cc: Damien Le Moal 
Cc: Christoph Hellwig 
Cc: sta...@vger.kernel.org
---
  drivers/scsi/scsi_lib.c | 2 --
  1 file changed, 2 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 74a39db57d49..71f5b010684c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -735,8 +735,6 @@ static blk_status_t __scsi_error_from_host_byte(struct 
scsi_cmnd *cmd,
int result)
  {
switch (host_byte(result)) {
-   case DID_OK:
-   return BLK_STS_OK;
case DID_TRANSPORT_FAILFAST:
return BLK_STS_TRANSPORT;
case DID_TARGET_FAILURE:





[PATCH] Revert "scsi: core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"

2018-04-04 Thread Bart Van Assche
The description of commit e39a97353e53 is wrong: it mentions that
commit 2a842acab109 introduced a bug in __scsi_error_from_host_byte()
although that commit did not change the behavior of that function.
Additionally, that commit introduced a severe bug: it causes commands
that fail with hostbyte=DID_OK and driverbyte=DRIVER_SENSE to be
completed with BLK_STS_OK. Although that commit claims to fix a bug
it does not mention which bug it fixes. Hence revert that commit.

Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in 
__scsi_error_from_host_byte()")
Reported-by: Damien Le Moal 
Signed-off-by: Bart Van Assche 
Cc: Hannes Reinecke 
Cc: Douglas Gilbert 
Cc: Damien Le Moal 
Cc: Christoph Hellwig 
Cc: sta...@vger.kernel.org
---
 drivers/scsi/scsi_lib.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 74a39db57d49..71f5b010684c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -735,8 +735,6 @@ static blk_status_t __scsi_error_from_host_byte(struct 
scsi_cmnd *cmd,
int result)
 {
switch (host_byte(result)) {
-   case DID_OK:
-   return BLK_STS_OK;
case DID_TRANSPORT_FAILFAST:
return BLK_STS_TRANSPORT;
case DID_TARGET_FAILURE:
-- 
2.16.2



[no subject]

2018-04-04 Thread системы администратор


пользователь веб-почты

Обратите внимание, что 95% ваших писем, полученных после обновления сервера 
веб-почты в последнее время в нашей базе данных, были отложены. Регулярно 
получать и отправлять свои сообщения. Техническая команда нашей веб-почты 
обновит вашу учетную запись в течение 3 рабочих дней. Если вы этого не 
сделаете, ваша учетная запись будет временно приостановлена нашими службами. 
Чтобы повторно подтвердить свой почтовый ящик, пришлите следующую информацию:

обычный:
Имя пользователя:
пароль:
Подтвердите Пароль:

Предупреждение!! Если это не позволит обновлять учетные записи в
течение двух дней после получения электронной почты, они будут
постоянно потерять учетные записи владельцев учетных записей
веб-почты.

Спасибо за ваше сотрудничество!

Copyright  2017-2018 Служба технической поддержки WebMail, Inc.


Re: [PATCHv2] scsi: Fix failed request error code

2018-04-04 Thread Douglas Gilbert

On 2018-04-04 03:27 AM, Hannes Reinecke wrote:

With the introduction of commit e39a97353e53 ("scsi: core: return
BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command that
failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but lacking
additional sense information will have a return code set to BLK_STS_OK.
This results in the request issuer to see successful request execution
despite the failure. An example of such case is an unaligned write on a
host managed ZAC disk connected to a SAS HBA with a malfunctioning SAT.
The unaligned write command gets aborted but has no additional sense
information.

sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current]
sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense information
sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 
00 01 00 00
print_req_error: I/O error, dev sde, sector 274726920

In scsi_io_completion(), sense key handling to not change the request
error code and success being reported to the issuer.

Fix this by making sure that the error code always indicates an error
if scsi_io_completion() decide that the action to be taken for a failed
command is to not retry it and terminate it immediately (ACTION_FAIL) .

Signed-off-by: Damien Le Moal 
Signed-off-by: Hannes Reinecke 
Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in 
__scsi_error_from_host_byte()")
Cc: 
---
  drivers/scsi/scsi_lib.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 393f9db8f41b..9389c41e2829 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -905,6 +905,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned 
int good_bytes)
goto requeue;
  
  	error = __scsi_error_from_host_byte(cmd, result);

+   /*
+* If the hostbyte was DID_OK, but the sense code is valid
+* we always should set BLK_STS_IOERR.
+*/
+   if (error == BLK_STS_OK && sense_valid)


&& !sense_deferred

Doug Gilbert


+   error = BLK_STS_IOERR;
  
  	if (host_byte(result) == DID_RESET) {

/* Third party bus reset or reset for error recovery





Re: [PATCHv2] scsi: Fix failed request error code

2018-04-04 Thread Bart Van Assche
On Wed, 2018-04-04 at 09:27 +0200, Hannes Reinecke wrote:
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 393f9db8f41b..9389c41e2829 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -905,6 +905,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned 
> int good_bytes)
>   goto requeue;
>  
>   error = __scsi_error_from_host_byte(cmd, result);
> + /*
> +  * If the hostbyte was DID_OK, but the sense code is valid
> +  * we always should set BLK_STS_IOERR.
> +  */
> + if (error == BLK_STS_OK && sense_valid)
> + error = BLK_STS_IOERR;
>  
>   if (host_byte(result) == DID_RESET) {
>   /* Third party bus reset or reset for error recovery

__scsi_error_from_host_byte() has two callers. Why does this patch only
update one of these two callers?

Regarding commit e39a97353e53, the description of that commit is as follows:
"When converting __scsi_error_from_host_byte() to BLK_STS error codes the
case DID_OK was forgotten, resulting in it always returning an error."
However, the comment above that function reads as follows: "translate SCSI
error code into errno". If I have a look at the v4.12 SCSI core (before the
blkstatus_t conversion) then I see that __scsi_error_from_host_byte() never
returns 0. That means that the description of commit e39a97353e53 ("scsi:
core: return BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()") is
wrong. Does that mean that commit e39a97353e53 should be reverted?

Thanks,

Bart.




Re: [PATCH 2/2] sd_zbc: Avoid errors due to sd_zbc_setup() execution

2018-04-04 Thread Bart Van Assche
On Wed, 2018-04-04 at 17:54 +0900, Damien Le Moal wrote:
> Since SCSI scanning occurs asynchronously, since sd_revalidate_disk()
> is called from sd_probe_async() and since sd_revalidate_disk() calls
> sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
> concurrently with operations referencing a drive zone bitmaps and number
   

Should "a" be changed into "the"?

> [Damien] Updated commit message and changed nr_zones/bitmap swap order.

Updating the number of zones after having updated the bitmap pointers is not
sufficient to avoid trouble if the number of zones as reported by the drive
changes while I/O is in progress. With the current implementation if the
number of zones changes the seq_zones_bitmap is cleared. Can this cause
trouble for the mq-deadline scheduler? Additionally, CPUs other than x86 can
reorder store operations. Even worse, a CPU could cache the zone bitmap
pointers which means that at least RCU protection + kfree_rcu() is needed to
avoid trouble. I think we either should handle this case properly or issue a
kernel warning.

Thanks,

Bart.




Re: [PATCH 1/2] sd_zbc: Avoid errors due to sd_zbc_check_zone_size() execution

2018-04-04 Thread Bart Van Assche

On 04/04/18 01:54, Damien Le Moal wrote:

  static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
  {
+   u64 sdkp_zone_blocks = sdkp->zone_blocks;


Shouldn't this variable be initialized to zero such that zone size 
changes are accepted even if the SAME field in the REPORT ZONES response 
is zero?


Thanks,

Bart.


RE: [PATCH] aacraid: Insure command thread is not recursively stopped

2018-04-04 Thread Raghava Aditya Renukunta


> -Original Message-
> From: Dave Carroll [mailto:david.carr...@microsemi.com]
> Sent: Wednesday, April 4, 2018 3:21 AM
> To: Martin K . Petersen ; James Bottomley
> 
> Cc: Dave Carroll ; linux-scsi  s...@vger.kernel.org>; dl-esc-Aacraid Linux Driver
> ; Scott Benesh 
> Subject: [PATCH] aacraid: Insure command thread is not recursively stopped
> 
> If a recursive IOP_RESET is invoked, usually due to the eh_thread handling
> errors after the first reset, be sure we flag that the command thread has
> been stopped to avoid an Oops of the form;
> 
>  [ 336.620256] CPU: 28 PID: 1193 Comm: scsi_eh_0 Kdump: loaded Not
> tainted 4.14.0-49.el7a.ppc64le #1
>  [ 336.620297] task: c03fd630b800 task.stack: c03fd61a4000
>  [ 336.620326] NIP: c0176794 LR: c013038c CTR:
> c024bc10
>  [ 336.620361] REGS: c03fd61a7720 TRAP: 0300 Not tainted (4.14.0-
> 49.el7a.ppc64le)
>  [ 336.620395] MSR: 90009033  CR:
> 22084022 XER: 2004
>  [ 336.620435] CFAR: c0130388 DAR:  DSISR:
> 4000 SOFTE: 1
>  [ 336.620435] GPR00: c013038c c03fd61a79a0 c14c7e00
> 
>  [ 336.620435] GPR04: 000c 000c
> 90009033 0477
>  [ 336.620435] GPR08: 0477 
>  c00810f7d940
>  [ 336.620435] GPR12: c024bc10 c7a33400
> c01708a8 c03fe3b881d8
>  [ 336.620435] GPR16: c03fe3b88060 c03fd61a7d10 f000
> 001e
>  [ 336.620435] GPR20: 0001 c0ebf1a0
> 0001 c03fe3b88000
>  [ 336.620435] GPR24: 0003 0002
> c03fe3b88840 c03fe3b887e8
>  [ 336.620435] GPR28: c03fe3b88000 c03fc8181788 
> c03fc8181700
>  [ 336.620750] NIP [c0176794] exit_creds+0x34/0x160
>  [ 336.620775] LR [c013038c] __put_task_struct+0x8c/0x1f0
>  [ 336.620804] Call Trace:
>  [ 336.620817] [c03fd61a79a0] [c03fe3b88000] 0xc03fe3b88000
> (unreliable)
>  [ 336.620853] [c03fd61a79d0] [c013038c]
> __put_task_struct+0x8c/0x1f0
>  [ 336.620889] [c03fd61a7a00] [c0171418]
> kthread_stop+0x1e8/0x1f0
>  [ 336.620922] [c03fd61a7a40] [c00810f7448c]
> aac_reset_adapter+0x14c/0x8d0 [aacraid]
>  [ 336.620959] [c03fd61a7b00] [c00810f60174]
> aac_eh_host_reset+0x84/0x100 [aacraid]
>  [ 336.621010] [c03fd61a7b30] [c0864f24]
> scsi_try_host_reset+0x74/0x180
>  [ 336.621046] [c03fd61a7bb0] [c0867ac0]
> scsi_eh_ready_devs+0xc00/0x14d0
>  [ 336.625165] [c03fd61a7ca0] [c08699e0]
> scsi_error_handler+0x550/0x730
>  [ 336.632101] [c03fd61a7dc0] [c0170a08] kthread+0x168/0x1b0
>  [ 336.639031] [c03fd61a7e30] [c000b528]
> ret_from_kernel_thread+0x5c/0xb4
>  [ 336.645971] Instruction dump:
>  [ 336.648743] 384216a0 7c0802a6 fbe1fff8 f8010010 f821ffd1 7c7f1b78
> 6000 6000
>  [ 336.657056] 3940 e87f0838 f95f0838 7c0004ac <7d401828> 314a
> 7d40192d 40c2fff4
>  [ 336.663997] -[ end trace 4640cf8d4945ad95 ]-
> 
> So flag when the thread is stopped by setting the thread pointer to NULL.
> 
> Signed-off-by: Dave Carroll 
Reviewed-by: Raghava Aditya Renukunta 


Re: [PATCH 03/15] mpt3sas: Add sanity checks for scsi tracker before accessing it.

2018-04-04 Thread Sreekanth Reddy
On Tue, Apr 3, 2018 at 9:26 PM, Bart Van Assche  wrote:
> On Tue, 2018-04-03 at 10:11 +0530, Sreekanth Reddy wrote:
>> [SR] No, driver calls _scsih_flush_running_cmds during Host reset time
>> and during host reset time driver will set 'ioc->shost_recovery' flag.
>> So in the scsih_qcmd() driver will return the incoming SCSI cmds with
>> "SCSI_MLQUEUE_HOST_BUSY" whenever 'ioc->shost_recovery' flag is set as
>> shown below,
>>
>> /* host recovery or link resets sent via IOCTLs */
>> if (ioc->shost_recovery || ioc->ioc_link_reset_in_progress)
>> return SCSI_MLQUEUE_HOST_BUSY;
>
> The ioc->shost_recovery flag is involved in at least the following race:
> * From one context a SCSI command is submitted and scsih_qcmd() gets called.
> * At the same time sg_reset is invoked from a shell and triggers a call to
>   scsih_host_reset(). That function in turn will call
>   mpt3sas_base_hard_reset_handler().
>
> I think this scenario can cause ioc->shost_recovery to be set by the mpt3sas
> driver after it has been checked by the scsih_qcmd() function.

Then these outstanding commands will be get flush by the driver in
_scsih_flush_running_cmds() function which we call as a part of host
reset operation.

>
> Anyway, let's get back to patch 03/15 at the start of this e-mail thread.
> That patch looks to me like an incomplete attempt to work around a race
> condition in the mpt3sas driver. I don't expect that anyone will trust that
> patch without further explanation. Which race condition does that patch
> address? And why do the mpt3sas maintainers believe that this patch is
> sufficient to address that race condition?

Sure Bart, we will add proper description with the information which I
explained in this mail thread on how this patch will fix below issue,

During Host reset operation time driver will
flush out all the outstanding IO's to the SML in below function,

static void
_scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc)
{
struct scsi_cmnd *scmd;
struct scsiio_tracker *st;
u16 smid;
int count = 0;

[SR] Here driver is looping starting from smid one to HBA queue depth.
for (smid = 1; smid <= ioc->scsiio_depth; smid++) {

[SR] Some times it is possible that scsi_cmnd might have created at
SML but it might not be issued to the driver as host reset operation
is going on, So here we will get non-NULL scmd.
scmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid);
if (!scmd)
continue;
count++;
_scsih_set_satl_pending(scmd, false);
[SR] Here we are trying to get the scsi tracker 'st' for the above
scmd (which is not received by the driver) and so fields of this 'st'
are uninitialized.
st = scsi_cmd_priv(scmd);
[SR] And here we are trying to clear the scsi tracker 'st' which is
not yet all initialized by the driver, in other terms we are trying to
flush out the scmd command which is not yet all received by the
driver.
mpt3sas_base_clear_st(ioc, st);
scsi_dma_unmap(scmd);
if (ioc->pci_error_recovery || ioc->remove_host)
scmd->result = DID_NO_CONNECT << 16;
else
scmd->result = DID_RESET << 16;
scmd->scsi_done(scmd);
}
dtmprintk(ioc, pr_info(MPT3SAS_FMT "completing %d cmds\n",
ioc->name, count));
}

>
> Thanks,
>
> Bart.
>


Re: [PATCH 1/1] scsi: mvsas:fix memory leak

2018-04-04 Thread kbuild test robot
Hi Xidong,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on mkp-scsi/for-next]
[also build test WARNING on v4.16 next-20180403]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Xidong-Wang/scsi-mvsas-fix-memory-leak/20180404-182132
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next
config: x86_64-randconfig-x010-201813 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   drivers/scsi//mvsas/mv_init.c: In function 'mvs_pci_alloc':
>> drivers/scsi//mvsas/mv_init.c:373:2: warning: this 'if' clause does not 
>> guard... [-Wmisleading-indentation]
 if (!mvi)
 ^~
   drivers/scsi//mvsas/mv_init.c:375:3: note: ...this statement, but the latter 
is misleadingly indented as if it were guarded by the 'if'
  return NULL;
  ^~
   Cyclomatic Complexity 1 arch/x86/include/asm/bitops.h:test_and_set_bit
   Cyclomatic Complexity 1 arch/x86/include/asm/bitops.h:fls64
   Cyclomatic Complexity 1 include/linux/log2.h:__ilog2_u64
   Cyclomatic Complexity 1 include/linux/mem_encrypt.h:sme_get_me_mask
   Cyclomatic Complexity 1 include/asm-generic/getorder.h:__get_order
   Cyclomatic Complexity 1 arch/x86/include/asm/paravirt.h:arch_local_save_flags
   Cyclomatic Complexity 1 
arch/x86/include/asm/irqflags.h:arch_irqs_disabled_flags
   Cyclomatic Complexity 1 include/linux/device.h:dev_get_drvdata
   Cyclomatic Complexity 1 include/linux/device.h:dev_set_drvdata
   Cyclomatic Complexity 1 include/linux/dma-debug.h:debug_dma_free_coherent
   Cyclomatic Complexity 1 arch/x86/include/asm/dma-mapping.h:get_arch_dma_ops
   Cyclomatic Complexity 4 include/linux/dma-mapping.h:get_dma_ops
   Cyclomatic Complexity 1 include/linux/kasan.h:kasan_kmalloc
   Cyclomatic Complexity 28 include/linux/slab.h:kmalloc_index
   Cyclomatic Complexity 1 include/linux/slab.h:kmem_cache_alloc_trace
   Cyclomatic Complexity 1 include/linux/slab.h:kmalloc_order_trace
   Cyclomatic Complexity 68 include/linux/slab.h:kmalloc_large
   Cyclomatic Complexity 5 include/linux/slab.h:kmalloc
   Cyclomatic Complexity 1 include/linux/slab.h:kzalloc
   Cyclomatic Complexity 1 include/linux/pci.h:pci_get_drvdata
   Cyclomatic Complexity 1 include/linux/pci.h:pci_set_drvdata
   Cyclomatic Complexity 5 drivers/scsi//mvsas/mv_init.c:mvs_post_sas_ha_init
   Cyclomatic Complexity 8 
drivers/scsi//mvsas/mv_init.c:mvs_store_interrupt_coalescing
   Cyclomatic Complexity 1 
drivers/scsi//mvsas/mv_init.c:mvs_show_interrupt_coalescing
   Cyclomatic Complexity 1 drivers/scsi//mvsas/mv_init.c:mvs_show_driver_version
   Cyclomatic Complexity 1 arch/x86/include/asm/io.h:ioremap
   Cyclomatic Complexity 1 drivers/scsi//mvsas/mv_init.c:mvs_exit
   Cyclomatic Complexity 2 drivers/scsi//mvsas/mv_init.c:mvs_pci_alloc
   Cyclomatic Complexity 4 include/linux/dma-mapping.h:dma_free_attrs
   Cyclomatic Complexity 1 include/linux/dma-mapping.h:dma_free_coherent
   Cyclomatic Complexity 13 drivers/scsi//mvsas/mv_init.c:mvs_free
   Cyclomatic Complexity 2 drivers/scsi//mvsas/mv_init.c:mvs_pci_remove
   Cyclomatic Complexity 3 drivers/scsi//mvsas/mv_init.c:mvs_tasklet
   Cyclomatic Complexity 2 include/linux/interrupt.h:tasklet_schedule
   Cyclomatic Complexity 3 drivers/scsi//mvsas/mv_init.c:mvs_interrupt
   Cyclomatic Complexity 4 include/linux/dma-mapping.h:dma_supported
   Cyclomatic Complexity 3 include/linux/dma-mapping.h:dma_check_mask
   Cyclomatic Complexity 3 include/linux/dma-mapping.h:dma_set_mask
   Cyclomatic Complexity 1 include/linux/pci-dma-compat.h:pci_set_dma_mask
   Cyclomatic Complexity 2 include/linux/dma-mapping.h:dma_set_coherent_mask
   Cyclomatic Complexity 1 
include/linux/pci-dma-compat.h:pci_set_consistent_dma_mask
   Cyclomatic Complexity 6 drivers/scsi//mvsas/mv_init.c:pci_go_64
   Cyclomatic Complexity 5 include/linux/slab.h:kmalloc_array
   Cyclomatic Complexity 1 include/linux/slab.h:kcalloc
   Cyclomatic Complexity 3 drivers/scsi//mvsas/mv_init.c:mvs_prep_sas_ha_init
   Cyclomatic Complexity 2 drivers/scsi//mvsas/mv_init.c:mvs_init_sas_add
   Cyclomatic Complexity 1 include/scsi/scsi_host.h:scsi_add_host
   Cyclomatic Complexity 1 include/linux/interrupt.h:request_irq
   Cyclomatic Complexity 13 drivers/scsi//mvsas/mv_init.c:mvs_pci_init
   Cyclomatic Complexity 3 drivers/scsi//mvsas/mv_init.c:mvs_init
   Cyclomatic Complexity 13 drivers/scsi//mvsas/mv_init.c:mvs_ioremap
   Cyclomatic Complexity 1 drivers/scsi//mvsas/mv_init.c:mvs_iounmap

vim +/if +373 drivers/scsi//mvsas/mv_init.c

20b09c29 Andy Yan   2009-05-08  362  
6f039790 Greg Kroah-Hartman 2012-12-21  363  static struct mvs_info 
*mvs_pci_alloc(struct pci_dev *pdev,
20b09c29 Andy Yan   2009-05-08  364 const 
struct 

Re: [PATCH 1/1] scsi: mvsas:fix memory leak

2018-04-04 Thread kbuild test robot
Hi Xidong,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on mkp-scsi/for-next]
[also build test WARNING on v4.16 next-20180403]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Xidong-Wang/scsi-mvsas-fix-memory-leak/20180404-182132
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next
config: i386-randconfig-x016-201813 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from include/linux/kernel.h:10:0,
from drivers/scsi/mvsas/mv_sas.h:29,
from drivers/scsi/mvsas/mv_init.c:27:
   drivers/scsi/mvsas/mv_init.c: In function 'mvs_pci_alloc':
   include/linux/compiler.h:58:2: warning: this 'if' clause does not guard... 
[-Wmisleading-indentation]
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^
   include/linux/compiler.h:56:23: note: in expansion of macro '__trace_if'
#define if(cond, ...) __trace_if( (cond , ## __VA_ARGS__) )
  ^~
>> drivers/scsi/mvsas/mv_init.c:373:2: note: in expansion of macro 'if'
 if (!mvi)
 ^~
   drivers/scsi/mvsas/mv_init.c:375:3: note: ...this statement, but the latter 
is misleadingly indented as if it were guarded by the 'if'
  return NULL;
  ^~

vim +/if +373 drivers/scsi/mvsas/mv_init.c

20b09c29 Andy Yan   2009-05-08  362  
6f039790 Greg Kroah-Hartman 2012-12-21  363  static struct mvs_info 
*mvs_pci_alloc(struct pci_dev *pdev,
20b09c29 Andy Yan   2009-05-08  364 const 
struct pci_device_id *ent,
20b09c29 Andy Yan   2009-05-08  365 struct 
Scsi_Host *shost, unsigned int id)
20b09c29 Andy Yan   2009-05-08  366  {
84fbd0ce Xiangliang Yu  2011-05-24  367 struct mvs_info *mvi = NULL;
20b09c29 Andy Yan   2009-05-08  368 struct sas_ha_struct *sha = 
SHOST_TO_SAS_HA(shost);
20b09c29 Andy Yan   2009-05-08  369  
b89e8f53 Xiangliang Yu  2011-05-24  370 mvi = kzalloc(sizeof(*mvi) +
b89e8f53 Xiangliang Yu  2011-05-24  371 (1L << 
mvs_chips[ent->driver_data].slot_width) *
b89e8f53 Xiangliang Yu  2011-05-24  372 sizeof(struct 
mvs_slot_info), GFP_KERNEL);
20b09c29 Andy Yan   2009-05-08 @373 if (!mvi)
549079d1 Xidong Wang2018-04-04  374 scsi_host_put(shost);
20b09c29 Andy Yan   2009-05-08  375 return NULL;
dd4969a8 Jeff Garzik2009-05-08  376  
20b09c29 Andy Yan   2009-05-08  377 mvi->pdev = pdev;
20b09c29 Andy Yan   2009-05-08  378 mvi->dev = >dev;
20b09c29 Andy Yan   2009-05-08  379 mvi->chip_id = ent->driver_data;
20b09c29 Andy Yan   2009-05-08  380 mvi->chip = 
_chips[mvi->chip_id];
20b09c29 Andy Yan   2009-05-08  381 INIT_LIST_HEAD(>wq_list);
20b09c29 Andy Yan   2009-05-08  382  
20b09c29 Andy Yan   2009-05-08  383 ((struct mvs_prv_info 
*)sha->lldd_ha)->mvi[id] = mvi;
20b09c29 Andy Yan   2009-05-08  384 ((struct mvs_prv_info 
*)sha->lldd_ha)->n_phy = mvi->chip->n_phy;
20b09c29 Andy Yan   2009-05-08  385  
20b09c29 Andy Yan   2009-05-08  386 mvi->id = id;
20b09c29 Andy Yan   2009-05-08  387 mvi->sas = sha;
20b09c29 Andy Yan   2009-05-08  388 mvi->shost = shost;
20b09c29 Andy Yan   2009-05-08  389  
b89e8f53 Xiangliang Yu  2011-05-24  390 mvi->tags = 
kzalloc(MVS_CHIP_SLOT_SZ>>3, GFP_KERNEL);
b89e8f53 Xiangliang Yu  2011-05-24  391 if (!mvi->tags)
b89e8f53 Xiangliang Yu  2011-05-24  392 goto err_out;
b89e8f53 Xiangliang Yu  2011-05-24  393  
20b09c29 Andy Yan   2009-05-08  394 if 
(MVS_CHIP_DISP->chip_ioremap(mvi))
20b09c29 Andy Yan   2009-05-08  395 goto err_out;
20b09c29 Andy Yan   2009-05-08  396 if (!mvs_alloc(mvi, shost))
20b09c29 Andy Yan   2009-05-08  397 return mvi;
dd4969a8 Jeff Garzik2009-05-08  398  err_out:
dd4969a8 Jeff Garzik2009-05-08  399 mvs_free(mvi);
dd4969a8 Jeff Garzik2009-05-08  400 return NULL;
dd4969a8 Jeff Garzik2009-05-08  401  }
dd4969a8 Jeff Garzik2009-05-08  402  

:: The code at line 373 was first introduced by commit
:: 20b09c2992fefbe78f8cede7b404fb143a413c52 [SCSI] mvsas: add support for 
94xx; layout change; bug fixes

:: TO: Andy Yan <a...@marvell.com>
:: CC: James Bottomley <james.bottom...@hansenpartnership.com>

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


[PATCH 2/2] sd_zbc: Avoid errors due to sd_zbc_setup() execution

2018-04-04 Thread Damien Le Moal
From: Bart Van Assche 

Since SCSI scanning occurs asynchronously, since sd_revalidate_disk()
is called from sd_probe_async() and since sd_revalidate_disk() calls
sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
concurrently with operations referencing a drive zone bitmaps and number
of zones. Make sure that this race does not cause failures when
revalidate does not detect any change by making the following changes to
sd_zbc_setup():
- Ensure that sd_zbc_setup_seq_zones_bitmap() does not change any
  ZBC metadata in the request queue.
- Only modify the ZBC information in the request queue that has
  changed. If the number of zones has changed, update q->nr_zones,
  q->seq_zones_wlock and q->seq_zones_bitmap. If the type of some
  zones has changed but not the number of zones, only update the
  zone type information.

Signed-off-by: Bart Van Assche 
[Damien] Updated commit message and changed nr_zones/bitmap swap order.
Signed-off-by: Damien Le Moal 
Cc: Christoph Hellwig 
Cc: Hannes Reinecke 
Cc: sta...@vger.kernel.org
---
 drivers/scsi/sd_zbc.c | 45 +++--
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index b59454ed5087..39ddbe92769c 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -551,14 +551,13 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk 
*sdkp, unsigned char *buf,
 }
 
 /**
- * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap.
+ * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
  * @sdkp: target disk
  *
  * Allocate a zone bitmap and initialize it by identifying sequential zones.
  */
-static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
+static unsigned long *sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
 {
-   struct request_queue *q = sdkp->disk->queue;
unsigned long *seq_zones_bitmap;
sector_t lba = 0;
unsigned char *buf;
@@ -566,7 +565,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk 
*sdkp)
 
seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp);
if (!seq_zones_bitmap)
-   return -ENOMEM;
+   return ERR_PTR(-ENOMEM);
 
buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
if (!buf)
@@ -589,12 +588,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk 
*sdkp)
kfree(buf);
if (ret) {
kfree(seq_zones_bitmap);
-   return ret;
+   return ERR_PTR(ret);
}
-
-   q->seq_zones_bitmap = seq_zones_bitmap;
-
-   return 0;
+   return seq_zones_bitmap;
 }
 
 static void sd_zbc_cleanup(struct scsi_disk *sdkp)
@@ -630,24 +626,37 @@ static int sd_zbc_setup(struct scsi_disk *sdkp)
 * of zones changed.
 */
if (sdkp->nr_zones != q->nr_zones) {
+   struct request_queue *q = sdkp->disk->queue;
+   unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
+   size_t zone_bitmap_size;
 
-   sd_zbc_cleanup(sdkp);
-
-   q->nr_zones = sdkp->nr_zones;
if (sdkp->nr_zones) {
-   q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
-   if (!q->seq_zones_wlock) {
+   seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp);
+   if (!seq_zones_wlock) {
ret = -ENOMEM;
goto err;
}
 
-   ret = sd_zbc_setup_seq_zones_bitmap(sdkp);
-   if (ret) {
-   sd_zbc_cleanup(sdkp);
+   seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp);
+   if (IS_ERR(seq_zones_bitmap)) {
+   ret = PTR_ERR(seq_zones_bitmap);
+   kfree(seq_zones_wlock);
goto err;
}
}
-
+   zone_bitmap_size = BITS_TO_LONGS(sdkp->nr_zones) *
+   sizeof(unsigned long);
+   if (q->nr_zones != sdkp->nr_zones) {
+   swap(q->seq_zones_wlock, seq_zones_wlock);
+   swap(q->seq_zones_bitmap, seq_zones_bitmap);
+   q->nr_zones = sdkp->nr_zones;
+   } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
+ zone_bitmap_size) != 0) {
+   memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
+  zone_bitmap_size);
+   }
+   kfree(seq_zones_wlock);
+   kfree(seq_zones_bitmap);
}
 
return 0;
-- 
2.14.3



[PATCH 0/2] Fix errors due to revalidation of ZBC disks

2018-04-04 Thread Damien Le Moal
The concurrent submission of commands such as a zone reset with the execution of
sd_zbc_read_zones() from sd_revalidate() context can cause the command
submissions to fail due to possible references to temporarily invalid values
such as the number of zones or the disk zone size.

This two ptches series introduces fix these problems by avoiding any change to
the disk information unless a change is detected by revalidate.

Bart Van Assche (1):
  sd_zbc: Avoid errors due to sd_zbc_setup() execution

Damien Le Moal (1):
  sd_zbc: Avoid errors due to sd_zbc_check_zone_size() execution

 drivers/scsi/sd_zbc.c | 58 +--
 1 file changed, 33 insertions(+), 25 deletions(-)

-- 
2.14.3



[PATCH 1/2] sd_zbc: Avoid errors due to sd_zbc_check_zone_size() execution

2018-04-04 Thread Damien Le Moal
When sd_revalidate() is executed for a ZBC disk (zoned block device),
sd_zbc_read_zones() is called to revalidate the disk zone configuration.
This executes sd_zbc_check_zone_size() to check that the disk zone sizes
are in line with the defined constraints (all zones must be the same
size and a power of 2 number of LBAs). As part of its execution,
sd_zbc_check_zone_size() was temporarily setting sdkp->zone_blocks to 0.
If during the execution of sd_zbc_check_zone_size() within
sd_revalidate() context, another context issues a command which
references sdkp->zone_blocks to check zone alignment of the command
(e.g. a zone reset is issued and sd_zbc_setup_reset_cmnd() is called),
an invalid value for the disk zone size is used and the alignment check
fails.

Simply fix this by using an on-stack variable inside
sd_zbc_check_zone_size() instead of directly using sdkp->zone_blocks.
This change is valid for both revalidate as well as for the first scan
of the device.

Signed-off-by: Damien Le Moal 
Cc: 
---
 drivers/scsi/sd_zbc.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 89cf4498f535..b59454ed5087 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -403,6 +403,7 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, 
unsigned char *buf)
  */
 static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 {
+   u64 sdkp_zone_blocks = sdkp->zone_blocks;
u64 zone_blocks = 0;
sector_t block = 0;
unsigned char *buf;
@@ -412,8 +413,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
int ret;
u8 same;
 
-   sdkp->zone_blocks = 0;
-
/* Get a buffer */
buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
if (!buf)
@@ -446,11 +445,11 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
/* Parse zone descriptors */
while (rec < buf + buf_len) {
zone_blocks = get_unaligned_be64([8]);
-   if (sdkp->zone_blocks == 0) {
-   sdkp->zone_blocks = zone_blocks;
-   } else if (zone_blocks != sdkp->zone_blocks &&
+   if (sdkp_zone_blocks == 0) {
+   sdkp_zone_blocks = zone_blocks;
+   } else if (zone_blocks != sdkp_zone_blocks &&
   (block + zone_blocks < sdkp->capacity
-   || zone_blocks > sdkp->zone_blocks)) {
+   || zone_blocks > sdkp_zone_blocks)) {
zone_blocks = 0;
goto out;
}
@@ -467,7 +466,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
 
} while (block < sdkp->capacity);
 
-   zone_blocks = sdkp->zone_blocks;
+   zone_blocks = sdkp_zone_blocks;
 
 out:
if (!zone_blocks) {
-- 
2.14.3



Re: [PATCH v5] scsi: new zorro_esp.c for Amiga Zorro NCR53C9x boards

2018-04-04 Thread Michael Schmitz
Hi Christoph,

thank you for taking the time to review and comment - my responses
inline below.

Am 03.04.2018 um 19:45 schrieb Christoph Hellwig:
> Just a few style comments:
> 
>> +static int zorro_esp_irq_pending(struct esp *);
>> +static int cyber_esp_irq_pending(struct esp *);
>> +static int fastlane_esp_irq_pending(struct esp *);
> 
> Please avoid forward declarations wherever possible.
> 
>> +struct zorro_driver_data {
>> +const char *name;
>> +unsigned long offset;
>> +unsigned long dma_offset;
>> +int absolute;   /* offset is absolute address */
>> +int scsi_option;
>> +int (*irq_pending)(struct esp *esp);
>> +void (*dma_invalidate)(struct esp *esp);
>> +void (*send_dma_cmd)(struct esp *esp, u32 dma_addr, u32 esp_count,
>> + u32 dma_count, int write, u8 cmd);
>> +};
> 
> Please use different esp_driver_ops instances for the different board
> types.
> 
>> +static const struct zorro_driver_data cyberstormI_data = {
>> +.name = "CyberStormI",
>> +.offset = 0xf400,
>> +.dma_offset = 0xf800,
>> +.absolute = 0,
>> +.scsi_option = 0,
> 
> You can remove all the xero initializations on static data.
> Also please align the = signs with tabs in struct declarations.

Will do all that.

>> +static dma_addr_t zorro_esp_map_single(struct esp *esp, void *buf,
>> +  size_t sz, int dir)
>> +{
>> +return dma_map_single(esp->dev, buf, sz, dir);
>> +}
>> +
>> +static int zorro_esp_map_sg(struct esp *esp, struct scatterlist *sg,
>> +  int num_sg, int dir)
>> +{
>> +return dma_map_sg(esp->dev, sg, num_sg, dir);
>> +}
>> +
>> +static void zorro_esp_unmap_single(struct esp *esp, dma_addr_t addr,
>> +  size_t sz, int dir)
>> +{
>> +dma_unmap_single(esp->dev, addr, sz, dir);
>> +}
>> +
>> +static void zorro_esp_unmap_sg(struct esp *esp, struct scatterlist *sg,
>> +  int num_sg, int dir)
>> +{
>> +dma_unmap_sg(esp->dev, sg, num_sg, dir);
>> +}
> 
> These wrappers seem rather pointless.

They are nonetheless necessary, for two reasons as far as I can see:

ESP driver DMA ops hooks as defined in esp_scsi.h use struct esp * as
first parameter, not struct dev *. Changing that would require changing
all ESP based drivers (sun_esp.c, jazz_esp.c,  am53c974.c, sun3_esp.c,
mac_esp.c) and the driver core. I'd like to hear Dave's view on that.

The generic dma_map_* and dma_unmap_* ops are defined as inline
functions in dma-mapping.h. There may be a good reason for that choice.
You probably know better than me what the reason for inlining was.

All other ESP drivers use the same wrapper mechanism, and most of them
don't do more than dereference the struct device pointer and pass that
and the rest of parameters on to the generic or PCI API hooks. I see no
reason to deviate from that convention.

>> +/* PIO macros as used in mac_esp.c.
>> + * Note that addr and fifo arguments are local-scope variables declared
>> + * in zorro_esp_send_pio_cmd(), the macros are only used in that function,
>> + * and addr and fifo are referenced in each use of the macros so there
>> + * is no need to pass them as macro parameters.
>> + */
> 
> Please use normal Linux comment style:
> 
> /*
>  * Blah, blah, blah.
>  */

Thanks, done.

>> +#define ZORRO_ESP_PIO_LOOP(operands, reg1) \
>> +{ \
>> +asm volatile ( \
>> + "1: moveb " operands "\n" \
>> + "   subqw #1,%1   \n" \
>> + "   jbne 1b   \n" \
>> + : "+a" (addr), "+r" (reg1) \
>> + : "a" (fifo)); \
>> +}
> 
> What is the point of the curly braces around the asm statement?

My overeager attempt at shutting up checkpatch ('need braces around
complex macro'). I'll revert to what Finn used in mac_esp.c (which is
where this code was taken from). That did pass review at the time, hope
this counts for something.

>> +static void zorro_esp_send_pio_cmd(struct esp *esp, u32 addr, u32 esp_count,
>> + u32 dma_count, int write, u8 cmd)
>> +{
>> +struct zorro_esp_priv *zep = dev_get_drvdata(esp->dev);
>> +u8 __iomem *fifo = esp->regs + ESP_FDATA * 16;
>> +u8 phase = esp->sreg & ESP_STAT_PMASK;
>> +
>> +cmd &= ~ESP_CMD_DMA;
>> +
>> +if (write) {
> 
> It seems like this routine would benefit from being split into a read
> and a write one.

The ESP driver uses just a single send_dma_cmd() function, so we'd have
to conditionalize which of the two PIO functions to call from within
each board specific send_dma_cmd(). IMO that makes the code harder to
follow. (I wouldn't go as far as Finn and add another layer of
indirection, but the impact is much the same.)

Finn also provided another justification for using a single routine for
read and write - we plan to share that code between mac_esp.c and
zorro_esp.c so I'd rather keep it as close to the mac_esp.c version as
possible.

>> +// Blizzard 1230/60 

Re: [PATCH] scsi: Fix failed request error code

2018-04-04 Thread Damien Le Moal


On 4/4/18 16:39, Damien Le Moal wrote:
> Hannes,
> 
> On 4/4/18 16:35, Hannes Reinecke wrote:
>> On Wed, 4 Apr 2018 07:06:58 +
>> Damien Le Moal  wrote:
>>
>>> Hannes,
>>>
>>> On 4/4/18 15:57, Hannes Reinecke wrote:
 On Wed,  4 Apr 2018 15:51:38 +0900
 Damien Le Moal  wrote:
   
> With the introduction of commit e39a97353e53 ("scsi: core: return
> BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command
> that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but
> lacking additional sense information will have a return code set to
> BLK_STS_OK. This results in the request issuer to see successful
> request execution despite the failure. An example of such case is
> an unaligned write on a host managed ZAC disk connected to a SAS
> HBA with a malfunctioning SAT. The unaligned write command gets
> aborted but has no additional sense information.
>
> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK
> driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key :
> Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense:
> No additional sense information sd 10:0:0:0: [sde] tag#3905 CDB:
> Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00
> print_req_error: I/O error, dev sde, sector 274726920
>
> In scsi_io_completion(), sense key handling to not change the
> request error code and success being reported to the issuer.
>
> Fix this by making sure that the error code always indicates an
> error if scsi_io_completion() decide that the action to be taken
> for a failed command is to not retry it and terminate it
> immediately (ACTION_FAIL) .
>
> Signed-off-by: Damien Le Moal 
> Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in
> __scsi_error_from_host_byte()") Cc: Hannes Reinecke 
> Cc: 
> ---
>  drivers/scsi/scsi_lib.c | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index c84f931388f2..87579bfcc186 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd
> *cmd, unsigned int good_bytes) scsi_print_command(cmd);
>   }
>   }
> + /*
> +  * The command failed and should not be retried.
> If the host
> +  * byte is DID_OK, then
> __scsi_error_from_host_byte() returned
> +  * BLK_STS_OK and error indicates a success. Make
> sure to not
> +  * use that as the completion code and always
> return an
> +  * I/O error.
> +  */
> + if (error == BLK_STS_OK)
> + error = BLK_STS_IOERR;
>   if (!scsi_end_request(req, error,
> blk_rq_err_bytes(req), 0)) return;
>   /*FALLTHRU*/  

 That looks wrong.
 Shouldn't __scsi_error_from_host_byte() return the correct status
 here?  
>>>
>>> My drive said:
>>>
>>> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK
>>> driverbyte=DRIVER_SENSE
>>> sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current]
>>> sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense
>>> information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00
>>> 00 02 0c 00 01 00 00 00 01 00 00
>>>
>>> Since hostbyte is DID_OK, __scsi_error_from_host_byte() returns
>>> BLK_STS_OK. The HBA fails to give sense data, so the ABORTED_COMMAND
>>> case in scsi_io_completion() "switch (sshdr.sense_key)" does nothing
>>> and error stays equal to success. scsi_end_request() gets called with
>>> that and dd sees a success...
>>>
>>> There are also plenty of other sense keys cases where error is not
>>> changed despite the fact that error can be BLK_STS_SUCCESS (in fact, I
>>> think this is likely the most common case since an command failure
>>> with hostbyte=DID_OK and driverbyte=DRIVER_SENSE is probably the most
>>> common one.
>>>
>>> My patch is a bit of a hammer and makes sure that an ACTION_FAIL
>>> request is completed as a failure... Am I getting all this wrong ?
>>>
>>
>> Just checked with the code, and send a new patch.
>> Which I find a bit clearer.
>>
>> Fact is that __scsi_error_from_host_byte() is not sufficient to
>> evaluate the return code, so taking its return value with any further
>> checks is indeed wrong.
>> But then it never said so on the boilerplate of
>> __scsi_error_from_host_byte() ...
>>
>> So please do check the 'v2' version of the patch.
>>
> 
> It fixes the particular problem I am seeing.
> But looking more at this code, isn't there a problem with
> sshdr.sense_key == RECOVERED_ERROR ?
> 
> if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) {
>   ...
>   result = 0;
>   

Re: [PATCH] scsi: Fix failed request error code

2018-04-04 Thread Damien Le Moal
Hannes,

On 4/4/18 16:35, Hannes Reinecke wrote:
> On Wed, 4 Apr 2018 07:06:58 +
> Damien Le Moal  wrote:
> 
>> Hannes,
>>
>> On 4/4/18 15:57, Hannes Reinecke wrote:
>>> On Wed,  4 Apr 2018 15:51:38 +0900
>>> Damien Le Moal  wrote:
>>>   
 With the introduction of commit e39a97353e53 ("scsi: core: return
 BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command
 that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but
 lacking additional sense information will have a return code set to
 BLK_STS_OK. This results in the request issuer to see successful
 request execution despite the failure. An example of such case is
 an unaligned write on a host managed ZAC disk connected to a SAS
 HBA with a malfunctioning SAT. The unaligned write command gets
 aborted but has no additional sense information.

 sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK
 driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key :
 Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense:
 No additional sense information sd 10:0:0:0: [sde] tag#3905 CDB:
 Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00
 print_req_error: I/O error, dev sde, sector 274726920

 In scsi_io_completion(), sense key handling to not change the
 request error code and success being reported to the issuer.

 Fix this by making sure that the error code always indicates an
 error if scsi_io_completion() decide that the action to be taken
 for a failed command is to not retry it and terminate it
 immediately (ACTION_FAIL) .

 Signed-off-by: Damien Le Moal 
 Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in
 __scsi_error_from_host_byte()") Cc: Hannes Reinecke 
 Cc: 
 ---
  drivers/scsi/scsi_lib.c | 9 +
  1 file changed, 9 insertions(+)

 diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
 index c84f931388f2..87579bfcc186 100644
 --- a/drivers/scsi/scsi_lib.c
 +++ b/drivers/scsi/scsi_lib.c
 @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd
 *cmd, unsigned int good_bytes) scsi_print_command(cmd);
}
}
 +  /*
 +   * The command failed and should not be retried.
 If the host
 +   * byte is DID_OK, then
 __scsi_error_from_host_byte() returned
 +   * BLK_STS_OK and error indicates a success. Make
 sure to not
 +   * use that as the completion code and always
 return an
 +   * I/O error.
 +   */
 +  if (error == BLK_STS_OK)
 +  error = BLK_STS_IOERR;
if (!scsi_end_request(req, error,
 blk_rq_err_bytes(req), 0)) return;
/*FALLTHRU*/  
>>>
>>> That looks wrong.
>>> Shouldn't __scsi_error_from_host_byte() return the correct status
>>> here?  
>>
>> My drive said:
>>
>> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK
>> driverbyte=DRIVER_SENSE
>> sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current]
>> sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense
>> information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00
>> 00 02 0c 00 01 00 00 00 01 00 00
>>
>> Since hostbyte is DID_OK, __scsi_error_from_host_byte() returns
>> BLK_STS_OK. The HBA fails to give sense data, so the ABORTED_COMMAND
>> case in scsi_io_completion() "switch (sshdr.sense_key)" does nothing
>> and error stays equal to success. scsi_end_request() gets called with
>> that and dd sees a success...
>>
>> There are also plenty of other sense keys cases where error is not
>> changed despite the fact that error can be BLK_STS_SUCCESS (in fact, I
>> think this is likely the most common case since an command failure
>> with hostbyte=DID_OK and driverbyte=DRIVER_SENSE is probably the most
>> common one.
>>
>> My patch is a bit of a hammer and makes sure that an ACTION_FAIL
>> request is completed as a failure... Am I getting all this wrong ?
>>
> 
> Just checked with the code, and send a new patch.
> Which I find a bit clearer.
> 
> Fact is that __scsi_error_from_host_byte() is not sufficient to
> evaluate the return code, so taking its return value with any further
> checks is indeed wrong.
> But then it never said so on the boilerplate of
> __scsi_error_from_host_byte() ...
> 
> So please do check the 'v2' version of the patch.
> 

It fixes the particular problem I am seeing.
But looking more at this code, isn't there a problem with
sshdr.sense_key == RECOVERED_ERROR ?

if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) {
...
result = 0;
/* for passthrough error may be set */
error = BLK_STS_OK;
}

Then the error will be changed wrongly to BLK_STS_IOERR.

My original 

Re: [PATCH] scsi: Fix failed request error code

2018-04-04 Thread Hannes Reinecke
On Wed, 4 Apr 2018 07:06:58 +
Damien Le Moal  wrote:

> Hannes,
> 
> On 4/4/18 15:57, Hannes Reinecke wrote:
> > On Wed,  4 Apr 2018 15:51:38 +0900
> > Damien Le Moal  wrote:
> >   
> >> With the introduction of commit e39a97353e53 ("scsi: core: return
> >> BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command
> >> that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but
> >> lacking additional sense information will have a return code set to
> >> BLK_STS_OK. This results in the request issuer to see successful
> >> request execution despite the failure. An example of such case is
> >> an unaligned write on a host managed ZAC disk connected to a SAS
> >> HBA with a malfunctioning SAT. The unaligned write command gets
> >> aborted but has no additional sense information.
> >>
> >> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK
> >> driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key :
> >> Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense:
> >> No additional sense information sd 10:0:0:0: [sde] tag#3905 CDB:
> >> Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00
> >> print_req_error: I/O error, dev sde, sector 274726920
> >>
> >> In scsi_io_completion(), sense key handling to not change the
> >> request error code and success being reported to the issuer.
> >>
> >> Fix this by making sure that the error code always indicates an
> >> error if scsi_io_completion() decide that the action to be taken
> >> for a failed command is to not retry it and terminate it
> >> immediately (ACTION_FAIL) .
> >>
> >> Signed-off-by: Damien Le Moal 
> >> Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in
> >> __scsi_error_from_host_byte()") Cc: Hannes Reinecke 
> >> Cc: 
> >> ---
> >>  drivers/scsi/scsi_lib.c | 9 +
> >>  1 file changed, 9 insertions(+)
> >>
> >> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> >> index c84f931388f2..87579bfcc186 100644
> >> --- a/drivers/scsi/scsi_lib.c
> >> +++ b/drivers/scsi/scsi_lib.c
> >> @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd
> >> *cmd, unsigned int good_bytes) scsi_print_command(cmd);
> >>}
> >>}
> >> +  /*
> >> +   * The command failed and should not be retried.
> >> If the host
> >> +   * byte is DID_OK, then
> >> __scsi_error_from_host_byte() returned
> >> +   * BLK_STS_OK and error indicates a success. Make
> >> sure to not
> >> +   * use that as the completion code and always
> >> return an
> >> +   * I/O error.
> >> +   */
> >> +  if (error == BLK_STS_OK)
> >> +  error = BLK_STS_IOERR;
> >>if (!scsi_end_request(req, error,
> >> blk_rq_err_bytes(req), 0)) return;
> >>/*FALLTHRU*/  
> > 
> > That looks wrong.
> > Shouldn't __scsi_error_from_host_byte() return the correct status
> > here?  
> 
> My drive said:
> 
> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK
> driverbyte=DRIVER_SENSE
> sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current]
> sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense
> information sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00
> 00 02 0c 00 01 00 00 00 01 00 00
> 
> Since hostbyte is DID_OK, __scsi_error_from_host_byte() returns
> BLK_STS_OK. The HBA fails to give sense data, so the ABORTED_COMMAND
> case in scsi_io_completion() "switch (sshdr.sense_key)" does nothing
> and error stays equal to success. scsi_end_request() gets called with
> that and dd sees a success...
> 
> There are also plenty of other sense keys cases where error is not
> changed despite the fact that error can be BLK_STS_SUCCESS (in fact, I
> think this is likely the most common case since an command failure
> with hostbyte=DID_OK and driverbyte=DRIVER_SENSE is probably the most
> common one.
> 
> My patch is a bit of a hammer and makes sure that an ACTION_FAIL
> request is completed as a failure... Am I getting all this wrong ?
> 

Just checked with the code, and send a new patch.
Which I find a bit clearer.

Fact is that __scsi_error_from_host_byte() is not sufficient to
evaluate the return code, so taking its return value with any further
checks is indeed wrong.
But then it never said so on the boilerplate of
__scsi_error_from_host_byte() ...

So please do check the 'v2' version of the patch.

Cheers,

Hannes



[PATCHv2] scsi: Fix failed request error code

2018-04-04 Thread Hannes Reinecke
With the introduction of commit e39a97353e53 ("scsi: core: return
BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command that
failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but lacking
additional sense information will have a return code set to BLK_STS_OK.
This results in the request issuer to see successful request execution
despite the failure. An example of such case is an unaligned write on a
host managed ZAC disk connected to a SAS HBA with a malfunctioning SAT.
The unaligned write command gets aborted but has no additional sense
information.

sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current]
sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense information
sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 
00 01 00 00
print_req_error: I/O error, dev sde, sector 274726920

In scsi_io_completion(), sense key handling to not change the request
error code and success being reported to the issuer.

Fix this by making sure that the error code always indicates an error
if scsi_io_completion() decide that the action to be taken for a failed
command is to not retry it and terminate it immediately (ACTION_FAIL) .

Signed-off-by: Damien Le Moal 
Signed-off-by: Hannes Reinecke 
Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in 
__scsi_error_from_host_byte()")
Cc: 
---
 drivers/scsi/scsi_lib.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 393f9db8f41b..9389c41e2829 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -905,6 +905,12 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned 
int good_bytes)
goto requeue;
 
error = __scsi_error_from_host_byte(cmd, result);
+   /*
+* If the hostbyte was DID_OK, but the sense code is valid
+* we always should set BLK_STS_IOERR.
+*/
+   if (error == BLK_STS_OK && sense_valid)
+   error = BLK_STS_IOERR;
 
if (host_byte(result) == DID_RESET) {
/* Third party bus reset or reset for error recovery
-- 
2.12.3



Re: [PATCH] scsi: Fix failed request error code

2018-04-04 Thread Damien Le Moal
Hannes,

On 4/4/18 15:57, Hannes Reinecke wrote:
> On Wed,  4 Apr 2018 15:51:38 +0900
> Damien Le Moal  wrote:
> 
>> With the introduction of commit e39a97353e53 ("scsi: core: return
>> BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command
>> that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but
>> lacking additional sense information will have a return code set to
>> BLK_STS_OK. This results in the request issuer to see successful
>> request execution despite the failure. An example of such case is an
>> unaligned write on a host managed ZAC disk connected to a SAS HBA
>> with a malfunctioning SAT. The unaligned write command gets aborted
>> but has no additional sense information.
>>
>> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK
>> driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key :
>> Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: No
>> additional sense information sd 10:0:0:0: [sde] tag#3905 CDB:
>> Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00
>> print_req_error: I/O error, dev sde, sector 274726920
>>
>> In scsi_io_completion(), sense key handling to not change the request
>> error code and success being reported to the issuer.
>>
>> Fix this by making sure that the error code always indicates an error
>> if scsi_io_completion() decide that the action to be taken for a
>> failed command is to not retry it and terminate it immediately
>> (ACTION_FAIL) .
>>
>> Signed-off-by: Damien Le Moal 
>> Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in
>> __scsi_error_from_host_byte()") Cc: Hannes Reinecke 
>> Cc: 
>> ---
>>  drivers/scsi/scsi_lib.c | 9 +
>>  1 file changed, 9 insertions(+)
>>
>> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
>> index c84f931388f2..87579bfcc186 100644
>> --- a/drivers/scsi/scsi_lib.c
>> +++ b/drivers/scsi/scsi_lib.c
>> @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd *cmd,
>> unsigned int good_bytes) scsi_print_command(cmd);
>>  }
>>  }
>> +/*
>> + * The command failed and should not be retried. If
>> the host
>> + * byte is DID_OK, then
>> __scsi_error_from_host_byte() returned
>> + * BLK_STS_OK and error indicates a success. Make
>> sure to not
>> + * use that as the completion code and always return
>> an
>> + * I/O error.
>> + */
>> +if (error == BLK_STS_OK)
>> +error = BLK_STS_IOERR;
>>  if (!scsi_end_request(req, error,
>> blk_rq_err_bytes(req), 0)) return;
>>  /*FALLTHRU*/
> 
> That looks wrong.
> Shouldn't __scsi_error_from_host_byte() return the correct status here?

My drive said:

sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK
driverbyte=DRIVER_SENSE
sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current]
sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense information
sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 00 02 0c 00 01
00 00 00 01 00 00

Since hostbyte is DID_OK, __scsi_error_from_host_byte() returns
BLK_STS_OK. The HBA fails to give sense data, so the ABORTED_COMMAND
case in scsi_io_completion() "switch (sshdr.sense_key)" does nothing and
error stays equal to success. scsi_end_request() gets called with that
and dd sees a success...

There are also plenty of other sense keys cases where error is not
changed despite the fact that error can be BLK_STS_SUCCESS (in fact, I
think this is likely the most common case since an command failure with
hostbyte=DID_OK and driverbyte=DRIVER_SENSE is probably the most common one.

My patch is a bit of a hammer and makes sure that an ACTION_FAIL request
is completed as a failure... Am I getting all this wrong ?

Best.

-- 
Damien Le Moal,
Western Digital

Re: [PATCH] scsi: Fix failed request error code

2018-04-04 Thread Hannes Reinecke
On Wed,  4 Apr 2018 15:51:38 +0900
Damien Le Moal  wrote:

> With the introduction of commit e39a97353e53 ("scsi: core: return
> BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command
> that failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but
> lacking additional sense information will have a return code set to
> BLK_STS_OK. This results in the request issuer to see successful
> request execution despite the failure. An example of such case is an
> unaligned write on a host managed ZAC disk connected to a SAS HBA
> with a malfunctioning SAT. The unaligned write command gets aborted
> but has no additional sense information.
> 
> sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK
> driverbyte=DRIVER_SENSE sd 10:0:0:0: [sde] tag#3905 Sense Key :
> Aborted Command [current] sd 10:0:0:0: [sde] tag#3905 Add. Sense: No
> additional sense information sd 10:0:0:0: [sde] tag#3905 CDB:
> Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 00 01 00 00
> print_req_error: I/O error, dev sde, sector 274726920
> 
> In scsi_io_completion(), sense key handling to not change the request
> error code and success being reported to the issuer.
> 
> Fix this by making sure that the error code always indicates an error
> if scsi_io_completion() decide that the action to be taken for a
> failed command is to not retry it and terminate it immediately
> (ACTION_FAIL) .
> 
> Signed-off-by: Damien Le Moal 
> Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in
> __scsi_error_from_host_byte()") Cc: Hannes Reinecke 
> Cc: 
> ---
>  drivers/scsi/scsi_lib.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index c84f931388f2..87579bfcc186 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd *cmd,
> unsigned int good_bytes) scsi_print_command(cmd);
>   }
>   }
> + /*
> +  * The command failed and should not be retried. If
> the host
> +  * byte is DID_OK, then
> __scsi_error_from_host_byte() returned
> +  * BLK_STS_OK and error indicates a success. Make
> sure to not
> +  * use that as the completion code and always return
> an
> +  * I/O error.
> +  */
> + if (error == BLK_STS_OK)
> + error = BLK_STS_IOERR;
>   if (!scsi_end_request(req, error,
> blk_rq_err_bytes(req), 0)) return;
>   /*FALLTHRU*/

That looks wrong.
Shouldn't __scsi_error_from_host_byte() return the correct status here?

Cheers,

Hannes


[PATCH] scsi: Fix failed request error code

2018-04-04 Thread Damien Le Moal
With the introduction of commit e39a97353e53 ("scsi: core: return
BLK_STS_OK for DID_OK in __scsi_error_from_host_byte()"), a command that
failed with hostbyte=DID_OK and driverbyte=DRIVER_SENSE but lacking
additional sense information will have a return code set to BLK_STS_OK.
This results in the request issuer to see successful request execution
despite the failure. An example of such case is an unaligned write on a
host managed ZAC disk connected to a SAS HBA with a malfunctioning SAT.
The unaligned write command gets aborted but has no additional sense
information.

sd 10:0:0:0: [sde] tag#3905 FAILED Result: hostbyte=DID_OK 
driverbyte=DRIVER_SENSE
sd 10:0:0:0: [sde] tag#3905 Sense Key : Aborted Command [current]
sd 10:0:0:0: [sde] tag#3905 Add. Sense: No additional sense information
sd 10:0:0:0: [sde] tag#3905 CDB: Write(16) 8a 00 00 00 00 00 02 0c 00 01 00 00 
00 01 00 00
print_req_error: I/O error, dev sde, sector 274726920

In scsi_io_completion(), sense key handling to not change the request
error code and success being reported to the issuer.

Fix this by making sure that the error code always indicates an error
if scsi_io_completion() decide that the action to be taken for a failed
command is to not retry it and terminate it immediately (ACTION_FAIL) .

Signed-off-by: Damien Le Moal 
Fixes: e39a97353e53 ("scsi: core: return BLK_STS_OK for DID_OK in 
__scsi_error_from_host_byte()")
Cc: Hannes Reinecke 
Cc: 
---
 drivers/scsi/scsi_lib.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c84f931388f2..87579bfcc186 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1002,6 +1002,15 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned 
int good_bytes)
scsi_print_command(cmd);
}
}
+   /*
+* The command failed and should not be retried. If the host
+* byte is DID_OK, then __scsi_error_from_host_byte() returned
+* BLK_STS_OK and error indicates a success. Make sure to not
+* use that as the completion code and always return an
+* I/O error.
+*/
+   if (error == BLK_STS_OK)
+   error = BLK_STS_IOERR;
if (!scsi_end_request(req, error, blk_rq_err_bytes(req), 0))
return;
/*FALLTHRU*/
-- 
2.14.3