Re: [PATCH] ibmvscsi:Remove no longer required comment for the function send_mad_adapter_info

2015-07-16 Thread Tyrel Datwyler
On 07/09/2015 10:24 AM, Nicholas Krause wrote:
 This removes the no longer required comment for the function
 send_mad_adapter_info stating that it always return zero due
 to this function being declared as void and thus never returning
 any useful value.
 
 Signed-off-by: Nicholas Krause xerofo...@gmail.com

Acked-by: Tyrel Datwyler tyr...@linux.vnet.ibm.com

 ---
  drivers/scsi/ibmvscsi/ibmvscsi.c | 1 -
  1 file changed, 1 deletion(-)
 
 diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c 
 b/drivers/scsi/ibmvscsi/ibmvscsi.c
 index 6a41c36..70ea976 100644
 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c
 +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
 @@ -1423,7 +1423,6 @@ static void adapter_info_rsp(struct srp_event_struct 
 *evt_struct)
   *  returned SRP version doesn't match ours.
   * @hostdata:ibmvscsi_host_data of host
   * 
 - * Returns zero if successful.
  */
  static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
  {
 

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/4] target: Return descriptor format sense data in case the LU spans 64bit sectors

2015-07-16 Thread Sagi Grimberg
In case a LU spans 64bit sectors, fixed size sense data information
field is only 32 bits which means the sector information will be truncated.

Thus, if the LU spans 64bit sectors, use descriptor format sense data to
correctly report sector information.

Reported-by: Christoph Hellwig h...@lst.de
Signed-off-by: Sagi Grimberg sa...@mellanox.com
---
 drivers/target/target_core_hba.c   |  5 +
 drivers/target/target_core_spc.c   | 12 +---
 drivers/target/target_core_transport.c |  3 ++-
 include/target/target_core_backend.h   |  2 ++
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c
index 62ea4e8..d746a3a 100644
--- a/drivers/target/target_core_hba.c
+++ b/drivers/target/target_core_hba.c
@@ -176,3 +176,8 @@ core_delete_hba(struct se_hba *hba)
kfree(hba);
return 0;
 }
+
+bool target_sense_desc_format(struct se_device *dev)
+{
+   return dev-transport-get_blocks(dev)  U32_MAX;
+}
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index c43dcbf..b949d33 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -761,7 +761,12 @@ static int spc_modesense_control(struct se_cmd *cmd, u8 
pc, u8 *p)
if (pc == 1)
goto out;
 
-   p[2] = 2;
+   /* GLTSD: No implicit save of log parameters */
+   p[2] = (1  1);
+   if (target_sense_desc_format(dev))
+   /* D_SENSE: Descriptor format sense data for 64bit sectors */
+   p[2] |= (1  2);
+
/*
 * From spc4r23, 7.4.7 Control mode page
 *
@@ -1144,6 +1149,7 @@ static sense_reason_t spc_emulate_request_sense(struct 
se_cmd *cmd)
unsigned char *rbuf;
u8 ua_asc = 0, ua_ascq = 0;
unsigned char buf[SE_SENSE_BUF];
+   bool desc_format = target_sense_desc_format(cmd-se_dev);
 
memset(buf, 0, SE_SENSE_BUF);
 
@@ -1158,10 +1164,10 @@ static sense_reason_t spc_emulate_request_sense(struct 
se_cmd *cmd)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
if (!core_scsi3_ua_clear_for_request_sense(cmd, ua_asc, ua_ascq))
-   scsi_build_sense_buffer(0, buf, UNIT_ATTENTION,
+   scsi_build_sense_buffer(desc_format, buf, UNIT_ATTENTION,
ua_asc, ua_ascq);
else
-   scsi_build_sense_buffer(0, buf, NO_SENSE, 0x0, 0x0);
+   scsi_build_sense_buffer(desc_format, buf, NO_SENSE, 0x0, 0x0);
 
memcpy(rbuf, buf, min_t(u32, sizeof(buf), cmd-data_length));
transport_kunmap_data_sg(cmd);
diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index 7fb031b..98155db 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2735,6 +2735,7 @@ static int translate_sense_reason(struct se_cmd *cmd, 
sense_reason_t reason)
u8 *buffer = cmd-sense_buffer;
int r = (__force int)reason;
u8 asc, ascq;
+   bool desc_format = target_sense_desc_format(cmd-se_dev);
 
if (r  ARRAY_SIZE(sense_info_table)  sense_info_table[r].key)
si = sense_info_table[r];
@@ -2754,7 +2755,7 @@ static int translate_sense_reason(struct se_cmd *cmd, 
sense_reason_t reason)
ascq = si-ascq;
}
 
-   scsi_build_sense_buffer(0, buffer, si-key, asc, ascq);
+   scsi_build_sense_buffer(desc_format, buffer, si-key, asc, ascq);
if (si-add_sector_info)
return scsi_set_sense_information(buffer,
  cmd-scsi_sense_length,
diff --git a/include/target/target_core_backend.h 
b/include/target/target_core_backend.h
index 1e5c8f9..56cf8e4 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -93,4 +93,6 @@ bool  target_lun_is_rdonly(struct se_cmd *);
 sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
 
+bool target_sense_desc_format(struct se_device *dev);
+
 #endif /* TARGET_CORE_BACKEND_H */
-- 
1.8.4.3

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] scsi: fix hang in scsi error handling

2015-07-16 Thread Hannes Reinecke
On 07/15/2015 02:47 PM, Kevin Groeneveld wrote:
 With the following setup/steps I can consistently trigger the scsi host to
 hang requiring a reboot:
 1. iMX6Q processor with built in AHCI compatible SATA host
 2. SATA port multiplier in CBS mode connected to iMX6Q
 3. HDD connected to port multiplier
 4. CDROM connected to port multiplier
 5. trigger continuous I/O to HDD
 6. repeatedly execute CDROM_DRIVE_STATUS ioctl on CDROM with no disc in
drive
 
 I don't think this issue is iMX6 specific but that is the only platform
 I have duplicated the hang on.
 
 To trigger the issue at least two CPU cores must be enabled and the HDD
 access and CDROM ioctls must be happening concurrently. If I only enable
 one CPU core the hang does not occur.
 
 The following C program can be used to trigger the CDROM ioctl:
 
 #include stdio.h
 #include fcntl.h
 #include linux/cdrom.h
 
 int main(int argc, char* argv[])
 {
   int fd;
 
   fd = open(/dev/cdrom, O_RDONLY | O_NONBLOCK);
   if(fd  0)
   {
   perror(cannot open /dev/cdrom);
   return fd;
   }
 
   for(;;)
   {
   ioctl(fd, CDROM_DRIVE_STATUS, 0);
   usleep(100 * 1000);
   }
 }
 
 When the hang occurs shost-host_busy == 2 and shost-host_failed == 1 in
 the scsi_eh_wakeup function. However this function only wakes the error
 handler if host_busy == host_failed.
 
Which just means that one command is still outstanding, and we need
to wait for it to complete.
But see below...

 The patch changes the condition to test if host_busy = host_failed and
 updates the corresponding condition in scsi_error_handler. Without the
 patch I can trigger the hang within seconds. With the patch I have not
 duplicated the hang after hours of testing.
 
 Signed-off-by: Kevin Groeneveld kgroenev...@lenbrook.com
 ---
  drivers/scsi/scsi_error.c |4 ++--
  1 files changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
 index 106884a..853964b 100644
 --- a/drivers/scsi/scsi_error.c
 +++ b/drivers/scsi/scsi_error.c
 @@ -61,7 +61,7 @@ static int scsi_try_to_abort_cmd(struct scsi_host_template 
 *,
  /* called with shost-host_lock held */
  void scsi_eh_wakeup(struct Scsi_Host *shost)
  {
 - if (atomic_read(shost-host_busy) == shost-host_failed) {
 + if (atomic_read(shost-host_busy) = shost-host_failed) {
   trace_scsi_eh_wakeup(shost);
   wake_up_process(shost-ehandler);
   SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost,
 @@ -2173,7 +2173,7 @@ int scsi_error_handler(void *data)
   while (!kthread_should_stop()) {
   set_current_state(TASK_INTERRUPTIBLE);
   if ((shost-host_failed == 0  shost-host_eh_scheduled == 0) 
 ||
 - shost-host_failed != atomic_read(shost-host_busy)) {
 + shost-host_failed  atomic_read(shost-host_busy)) {
   SCSI_LOG_ERROR_RECOVERY(1,
   shost_printk(KERN_INFO, shost,
scsi_eh_%d: sleeping\n,
 
Hmm.
I am really not sure about this.

'host_busy' indicates the number of outstanding commands, and
'host_failed' is the number of commands which have failed (on the
ground that failed commands are considered outstanding, too).

So the first hunk would change the behaviour from
'start SCSI EH once all commands are completed or failed' to
'start SCSI EH for _any_ command if scsi_eh_wakeup is called'
(note that shost_failed might be '0'...).
Which doesn't sound right.

The second hunk seems to be okay, as in principle 'host_busy' could
have been decreased before the check is done (ie someone could have
called -done on a failed command).
But even so this would point to an invalid command completion; as
soon as a command is marked as 'failed' control is back in the SCSI
midlayer, and no-one else should be tampering with it.

I guess this needs further debugging to get to the bottom of it.

Sorry, but:

NACK.

Cheers,

Hannes
-- 
Dr. Hannes ReineckezSeries  Storage
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] sd: Fix maximum I/O size for BLOCK_PC requests

2015-07-16 Thread Hannes Reinecke
On 07/16/2015 03:13 AM, Martin K. Petersen wrote:
 Commit bcdb247c6b6a (sd: Limit transfer length) clamped the maximum
 size of an I/O request to the MAXIMUM TRANSFER LENGTH field in the BLOCK
 LIMITS VPD. This had the unfortunate effect of also limiting the maximum
 size of non-filesystem requests sent to the device through sg/bsg.
 
 Avoid using blk_queue_max_hw_sectors() and set the max_sectors queue
 limit directly.
 
 Also update the comment in blk_limits_max_hw_sectors() to clarify that
 max_hw_sectors defines the limit for the I/O controller only.
 
 Signed-off-by: Martin K. Petersen martin.peter...@oracle.com
 Reported-by: Brian King brk...@linux.vnet.ibm.com
 Tested-by: Brian King brk...@linux.vnet.ibm.com
 Cc: sta...@vger.kernel.org # 3.17+
 ---
  block/blk-settings.c | 4 ++--
  drivers/scsi/sd.c| 6 +++---
  2 files changed, 5 insertions(+), 5 deletions(-)
 
 diff --git a/block/blk-settings.c b/block/blk-settings.c
 index 12600bfffca9..e0057d035200 100644
 --- a/block/blk-settings.c
 +++ b/block/blk-settings.c
 @@ -241,8 +241,8 @@ EXPORT_SYMBOL(blk_queue_bounce_limit);
   * Description:
   *Enables a low level driver to set a hard upper limit,
   *max_hw_sectors, on the size of requests.  max_hw_sectors is set by
 - *the device driver based upon the combined capabilities of I/O
 - *controller and storage device.
 + *the device driver based upon the capabilities of the I/O
 + *controller.
   *
   *max_sectors is a soft limit imposed by the block layer for
   *filesystem type requests.  This value can be overridden on a
 diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
 index 3b2fcb4fada0..a20da8c25b4f 100644
 --- a/drivers/scsi/sd.c
 +++ b/drivers/scsi/sd.c
 @@ -2770,9 +2770,9 @@ static int sd_revalidate_disk(struct gendisk *disk)
   max_xfer = sdkp-max_xfer_blocks;
   max_xfer = ilog2(sdp-sector_size) - 9;
  
 - max_xfer = min_not_zero(queue_max_hw_sectors(sdkp-disk-queue),
 - max_xfer);
 - blk_queue_max_hw_sectors(sdkp-disk-queue, max_xfer);
 + sdkp-disk-queue-limits.max_sectors =
 + min_not_zero(queue_max_hw_sectors(sdkp-disk-queue), max_xfer);
 +
   set_capacity(disk, sdkp-capacity);
   sd_config_write_same(sdkp);
   kfree(buffer);
 
Reviewed-by: Hannes Reinecke h...@suse.com

Cheers,

Hannes
-- 
Dr. Hannes ReineckezSeries  Storage
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [dm-devel] [PATCH] dm-mpath: always return reservation conflict

2015-07-16 Thread Christoph Hellwig
On Thu, Jul 16, 2015 at 05:07:03AM +, Christophe Varoqui wrote:
 For reference the opensvc crm does use type 5 pr, and aims for all paths
 registered. It still does not make use of the multipathd pr janitoring
 features, and uses sg_persist directly for pr status and actions.

The type doesn't matter here.  It's important to set the ALL_TG_PT bit
when registering the key.  As dm-mpath opens the underlying devices
exclusively, and doesn't give you a choice which path to send to you're
in a world of pain without that.
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] csiostor: Use list_for_each_safe instead of re-implementing it

2015-07-16 Thread Johannes Thumshirn
Christophe JAILLET christophe.jail...@wanadoo.fr writes:

 Use 'list_for_each_safe' instead of 'list_for_each' + own logic to keep
 safe when a list entry is deleted.
 Delete the now useless 'csio_list_prev' macro.

 Signed-off-by: Christophe JAILLET christophe.jail...@wanadoo.fr
 ---
  drivers/scsi/csiostor/csio_defs.h |  1 -
  drivers/scsi/csiostor/csio_hw.c   | 10 --
  drivers/scsi/csiostor/csio_scsi.c | 10 --
  3 files changed, 8 insertions(+), 13 deletions(-)

 diff --git a/drivers/scsi/csiostor/csio_defs.h 
 b/drivers/scsi/csiostor/csio_defs.h
 index c38017b..4b3557c 100644
 --- a/drivers/scsi/csiostor/csio_defs.h
 +++ b/drivers/scsi/csiostor/csio_defs.h
 @@ -70,7 +70,6 @@ csio_list_deleted(struct list_head *list)
  }
  
  #define csio_list_next(elem) (((struct list_head *)(elem))-next)
 -#define csio_list_prev(elem) (((struct list_head *)(elem))-prev)
  
  /* State machine */
  typedef void (*csio_sm_state_t)(void *, uint32_t);
 diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c
 index 622bdab..61ee6cb 100644
 --- a/drivers/scsi/csiostor/csio_hw.c
 +++ b/drivers/scsi/csiostor/csio_hw.c
 @@ -3643,20 +3643,19 @@ static void
  csio_mgmt_tmo_handler(uintptr_t data)
  {
   struct csio_mgmtm *mgmtm = (struct csio_mgmtm *) data;
 - struct list_head *tmp;
 + struct list_head *tmp, *next;
   struct csio_ioreq *io_req;
  
   csio_dbg(mgmtm-hw, Mgmt timer invoked!\n);
  
   spin_lock_irq(mgmtm-hw-lock);
  
 - list_for_each(tmp, mgmtm-active_q) {
 + list_for_each_safe(tmp, next, mgmtm-active_q) {
   io_req = (struct csio_ioreq *) tmp;
   io_req-tmo -= min_t(uint32_t, io_req-tmo, ECM_MIN_TMO);
  
   if (!io_req-tmo) {
   /* Dequeue the request from retry Q. */
 - tmp = csio_list_prev(tmp);
   list_del_init(io_req-sm.sm_list);
   if (io_req-io_cbfn) {
   /* io_req will be freed by completion handler */
 @@ -3680,7 +3679,7 @@ csio_mgmtm_cleanup(struct csio_mgmtm *mgmtm)
  {
   struct csio_hw *hw = mgmtm-hw;
   struct csio_ioreq *io_req;
 - struct list_head *tmp;
 + struct list_head *tmp, *next;
   uint32_t count;
  
   count = 30;
 @@ -3692,9 +3691,8 @@ csio_mgmtm_cleanup(struct csio_mgmtm *mgmtm)
   }
  
   /* release outstanding req from ACTIVEQ */
 - list_for_each(tmp, mgmtm-active_q) {
 + list_for_each_safe(tmp, next, mgmtm-active_q) {
   io_req = (struct csio_ioreq *) tmp;
 - tmp = csio_list_prev(tmp);
   list_del_init(io_req-sm.sm_list);
   mgmtm-stats.n_active--;
   if (io_req-io_cbfn) {
 diff --git a/drivers/scsi/csiostor/csio_scsi.c 
 b/drivers/scsi/csiostor/csio_scsi.c
 index 2c4562d..2bfb401 100644
 --- a/drivers/scsi/csiostor/csio_scsi.c
 +++ b/drivers/scsi/csiostor/csio_scsi.c
 @@ -2322,7 +2322,7 @@ csio_scsi_alloc_ddp_bufs(struct csio_scsim *scm, struct 
 csio_hw *hw,
int buf_size, int num_buf)
  {
   int n = 0;
 - struct list_head *tmp;
 + struct list_head *tmp, *next;
   struct csio_dma_buf *ddp_desc = NULL;
   uint32_t unit_size = 0;
  
 @@ -2370,9 +2370,8 @@ csio_scsi_alloc_ddp_bufs(struct csio_scsim *scm, struct 
 csio_hw *hw,
   return 0;
  no_mem:
   /* release dma descs back to freelist and free dma memory */
 - list_for_each(tmp, scm-ddp_freelist) {
 + list_for_each_safe(tmp, next, scm-ddp_freelist) {
   ddp_desc = (struct csio_dma_buf *) tmp;
 - tmp = csio_list_prev(tmp);
   pci_free_consistent(hw-pdev, ddp_desc-len, ddp_desc-vaddr,
   ddp_desc-paddr);
   list_del_init(ddp_desc-list);
 @@ -2393,13 +2392,12 @@ no_mem:
  static void
  csio_scsi_free_ddp_bufs(struct csio_scsim *scm, struct csio_hw *hw)
  {
 - struct list_head *tmp;
 + struct list_head *tmp, *next;
   struct csio_dma_buf *ddp_desc;
  
   /* release dma descs back to freelist and free dma memory */
 - list_for_each(tmp, scm-ddp_freelist) {
 + list_for_each_safe(tmp, next, scm-ddp_freelist) {
   ddp_desc = (struct csio_dma_buf *) tmp;
 - tmp = csio_list_prev(tmp);
   pci_free_consistent(hw-pdev, ddp_desc-len, ddp_desc-vaddr,
   ddp_desc-paddr);
   list_del_init(ddp_desc-list);

Reviewed-by: Johannes Thumshirn jthumsh...@suse.de

-- 
Johannes Thumshirn   Storage
jthumsh...@suse.de +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

Re: [PATCH v2] mpt2sas: setpci reset kernel oops fix

2015-07-16 Thread Hannes Reinecke
On 07/14/2015 01:23 PM, Nagarajkumar Narayanan wrote:
 
 Patch Description:
 
 In mpt2sas driver due to lack of synchronization between ioctl,
 BRM status access through sysfs, pci resource removal kernel oops
 happen as ioctl path and BRM status sysfs access path still tries
 to access the removed resources
 
 kernel: BUG: unable to handle kernel paging request at c900171e
 
 Oops:  [#1] SMP
 
 Two locks added to provide syncrhonization
 
 1. pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and
 pci resource handling. PCI resource freeing will lead to free
 vital hardware/memory resource, which might be in use by cli/sysfs
 path functions resulting in Null pointer reference followed by kernel
 crash. To avoid the above race condition we use mutex syncrhonization
 which ensures the syncrhonization between cli/sysfs_show path
 
 2. spinlock on list operations over IOCs
 
 Case: when multiple warpdrive cards(IOCs) are in use
 Each IOC will added to the ioc list stucture on initialization.
 Watchdog threads run at regular intervals to check IOC for any
 fault conditions which will trigger the dead_ioc thread to
 deallocate pci resource, resulting deleting the IOC netry from list,
 this deletion need to protected by spinlock to enusre that
 ioc removal is syncrhonized, if not synchronized it might lead to
 list_del corruption as the ioc list is traversed in cli path
 
 
 From 8db4d8194276ba420a4e93de4b09df6da5a934e4 Mon Sep 17 00:00:00 2001
 From: Nagarajkumar Narayanan nagarajkumar.naraya...@seagate.com
 Date: Tue, 14 Jul 2015 16:33:56 +0530
 Subject: [PATCH] mpt2sas setpci reset oops fix
 
 setpci reset on nytro warpdrive card along with sysfs access and
 cli ioctl access resulted in kernel oops
 
 1. pci_access_mutex lock added to provide synchronization between IOCTL,
sysfs, PCI resource handling path
 
 2. gioc_lock spinlock to protect list operations over multiple
 controllers
 
 Signed-off-by: Nagarajkumar Narayanan nagarajkumar.naraya...@seagate.com
 ---
 * v2
 - removed is_warpdrive condition for pci_access_mutex lock
 
 * v1
 - using DEFINE_SPINLOCK() to initialize the lock at compile time instead
   of using spin_lock_init
 
  drivers/scsi/mpt2sas/mpt2sas_base.c  |7 +++
  drivers/scsi/mpt2sas/mpt2sas_base.h  |   19 ++-
  drivers/scsi/mpt2sas/mpt2sas_ctl.c   |   33 +
  drivers/scsi/mpt2sas/mpt2sas_scsih.c |   15 ++-
  4 files changed, 68 insertions(+), 6 deletions(-)
 
 diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c 
 b/drivers/scsi/mpt2sas/mpt2sas_base.c
 index 11248de..f04dcc0 100644
 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c
 +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
 @@ -108,13 +108,17 @@ _scsih_set_fwfault_debug(const char *val, struct 
 kernel_param *kp)
  {
   int ret = param_set_int(val, kp);
   struct MPT2SAS_ADAPTER *ioc;
 + unsigned long flags;
  
   if (ret)
   return ret;
  
 + /* global ioc spinlock to protect controller list on list operations */
   printk(KERN_INFO setting fwfault_debug(%d)\n, mpt2sas_fwfault_debug);
 + spin_lock_irqsave(gioc_lock, flags);
   list_for_each_entry(ioc, mpt2sas_ioc_list, list)
   ioc-fwfault_debug = mpt2sas_fwfault_debug;
 + spin_unlock_irqrestore(gioc_lock, flags);
   return 0;
  }
  
 @@ -4436,6 +4440,8 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc)
   __func__));
  
   if (ioc-chip_phys  ioc-chip) {
 + /* synchronizing freeing resource with pci_access_mutex lock */
 + mutex_lock(ioc-pci_access_mutex);
   _base_mask_interrupts(ioc);
   ioc-shost_recovery = 1;
   _base_make_ioc_ready(ioc, CAN_SLEEP, SOFT_RESET);
 @@ -4454,6 +4460,7 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc)
   pci_disable_pcie_error_reporting(pdev);
   pci_disable_device(pdev);
   }
 + mutex_unlock(ioc-pci_access_mutex);
   return;
  }
  
Lock imbalance. Please move the call to 'mutex_lock()' out of the
'if' clause.

Cheers,

Hannes
-- 
Dr. Hannes ReineckezSeries  Storage
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] mpt2sas: Refcount sas_device objects and fix unsafe list usage

2015-07-16 Thread Sreekanth Reddy
On Sun, Jul 12, 2015 at 9:54 AM, Calvin Owens calvinow...@fb.com wrote:
 These objects can be referenced concurrently throughout the driver, we
 need a way to make sure threads can't delete them out from under each
 other. This patch adds the refcount, and refactors the code to use it.

 Additionally, we cannot iterate over the sas_device_list without
 holding the lock, or we risk corrupting random memory if items are
 added or deleted as we iterate. This patch refactors _scsih_probe_sas()
 to use the sas_device_list in a safe way.

 Cc: Christoph Hellwig h...@infradead.org
 Cc: Bart Van Assche bart.vanass...@sandisk.com
 Signed-off-by: Calvin Owens calvinow...@fb.com
 ---
  drivers/scsi/mpt2sas/mpt2sas_base.h  |  22 +-
  drivers/scsi/mpt2sas/mpt2sas_scsih.c | 434 
 ---
  drivers/scsi/mpt2sas/mpt2sas_transport.c |  12 +-
  3 files changed, 315 insertions(+), 153 deletions(-)

 diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h 
 b/drivers/scsi/mpt2sas/mpt2sas_base.h
 index caff8d1..78f41ac 100644
 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h
 +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
 @@ -238,6 +238,7 @@
   * @flags: MPT_TARGET_FLAGS_XXX flags
   * @deleted: target flaged for deletion
   * @tm_busy: target is busy with TM request.
 + * @sdev: The sas_device associated with this target
   */
  struct MPT2SAS_TARGET {
 struct scsi_target *starget;
 @@ -248,6 +249,7 @@ struct MPT2SAS_TARGET {
 u32 flags;
 u8  deleted;
 u8  tm_busy;
 +   struct _sas_device *sdev;
  };


 @@ -376,8 +378,24 @@ struct _sas_device {
 u8  phy;
 u8  responding;
 u8  pfa_led_on;
 +   struct kref refcount;
  };

 +static inline void sas_device_get(struct _sas_device *s)
 +{
 +   kref_get(s-refcount);
 +}
 +
 +static inline void sas_device_free(struct kref *r)
 +{
 +   kfree(container_of(r, struct _sas_device, refcount));
 +}
 +
 +static inline void sas_device_put(struct _sas_device *s)
 +{
 +   kref_put(s-refcount, sas_device_free);
 +}
 +
  /**
   * struct _raid_device - raid volume link list
   * @list: sas device list
 @@ -1095,7 +1113,9 @@ struct _sas_node 
 *mpt2sas_scsih_expander_find_by_handle(struct MPT2SAS_ADAPTER *
  u16 handle);
  struct _sas_node *mpt2sas_scsih_expander_find_by_sas_address(struct 
 MPT2SAS_ADAPTER
  *ioc, u64 sas_address);
 -struct _sas_device *mpt2sas_scsih_sas_device_find_by_sas_address(
 +struct _sas_device *mpt2sas_get_sdev_by_addr(
 +struct MPT2SAS_ADAPTER *ioc, u64 sas_address);
 +struct _sas_device *__mpt2sas_get_sdev_by_addr(
  struct MPT2SAS_ADAPTER *ioc, u64 sas_address);

  void mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc);
 diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c 
 b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
 index 3f26147..fad80ce 100644
 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
 +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
 @@ -526,8 +526,43 @@ _scsih_determine_boot_device(struct MPT2SAS_ADAPTER *ioc,
 }
  }

 +struct _sas_device *
 +__mpt2sas_get_sdev_from_target(struct MPT2SAS_TARGET *tgt_priv)
 +{
 +   struct _sas_device *ret;
 +
 +   ret = tgt_priv-sdev;
 +   if (ret)
 +   sas_device_get(ret);
 +
 +   return ret;
 +}
 +
 +struct _sas_device *
 +__mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc,
 +u64 sas_address)
 +{
 +   struct _sas_device *sas_device;
 +
 +   assert_spin_locked(ioc-sas_device_lock);
 +
 +   list_for_each_entry(sas_device, ioc-sas_device_list, list)
 +   if (sas_device-sas_address == sas_address)
 +   goto found_device;
 +
 +   list_for_each_entry(sas_device, ioc-sas_device_init_list, list)
 +   if (sas_device-sas_address == sas_address)
 +   goto found_device;
 +
 +   return NULL;
 +
 +found_device:
 +   sas_device_get(sas_device);
 +   return sas_device;
 +}
 +
  /**
 - * mpt2sas_scsih_sas_device_find_by_sas_address - sas device search
 + * mpt2sas_get_sdev_by_addr - sas device search
   * @ioc: per adapter object
   * @sas_address: sas address
   * Context: Calling function should acquire ioc-sas_device_lock
 @@ -536,24 +571,44 @@ _scsih_determine_boot_device(struct MPT2SAS_ADAPTER 
 *ioc,
   * object.
   */
  struct _sas_device *
 -mpt2sas_scsih_sas_device_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc,
 +mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc,
  u64 sas_address)
  {
 struct _sas_device *sas_device;
 +   unsigned long flags;
 +
 +   spin_lock_irqsave(ioc-sas_device_lock, flags);
 +   sas_device = __mpt2sas_get_sdev_by_addr(ioc,
 +   sas_address);
 +   spin_unlock_irqrestore(ioc-sas_device_lock, flags);
 +
 +   return sas_device;
 +}
 +
 +static struct _sas_device *
 +__mpt2sas_get_sdev_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle)
 +{
 +   struct _sas_device *sas_device;
 +
 +   

Re: [dm-devel] [PATCH] dm-mpath: always return reservation conflict

2015-07-16 Thread Hannes Reinecke
On 07/16/2015 09:54 AM, Christoph Hellwig wrote:
 On Thu, Jul 16, 2015 at 05:07:03AM +, Christophe Varoqui wrote:
 For reference the opensvc crm does use type 5 pr, and aims for all paths
 registered. It still does not make use of the multipathd pr janitoring
 features, and uses sg_persist directly for pr status and actions.
 
 The type doesn't matter here.  It's important to set the ALL_TG_PT bit
 when registering the key.  As dm-mpath opens the underlying devices
 exclusively, and doesn't give you a choice which path to send to you're
 in a world of pain without that.
 
Second that.

I would even put this in the manpage somewhere.

Cheers,

Hannes
-- 
Dr. Hannes ReineckezSeries  Storage
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] scsi: fix memory leak with scsi-mq

2015-07-16 Thread Tony Battersby
Fix a memory leak with scsi-mq triggered by commands with large data
transfer length.

__sg_alloc_table() sets both table-nents and table-orig_nents to the
same value.  When the scatterlist is DMA-mapped, table-nents is
overwritten with the (possibly smaller) size of the DMA-mapped
scatterlist, while table-orig_nents retains the original size of the
allocated scatterlist.  scsi_free_sgtable() should therefore check
orig_nents instead of nents, and all code that initializes sdb-table
without calling __sg_alloc_table() should set both nents and orig_nents.

Fixes: d285203cf647 (scsi: add support for a blk-mq based I/O path.)
Cc: sta...@vger.kernel.org # 3.17+
Signed-off-by: Tony Battersby to...@cybernetics.com
---

For immediate inclusion.

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 106884a..cfadcce 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -944,7 +944,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct 
scsi_eh_save *ses,
scmd-sdb.length);
scmd-sdb.table.sgl = ses-sense_sgl;
scmd-sc_data_direction = DMA_FROM_DEVICE;
-   scmd-sdb.table.nents = 1;
+   scmd-sdb.table.nents = scmd-sdb.table.orig_nents = 1;
scmd-cmnd[0] = REQUEST_SENSE;
scmd-cmnd[4] = scmd-sdb.length;
scmd-cmd_len = COMMAND_SIZE(scmd-cmnd[0]);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b1a2631..448ebda 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -583,7 +583,7 @@ static struct scatterlist *scsi_sg_alloc(unsigned int 
nents, gfp_t gfp_mask)
 
 static void scsi_free_sgtable(struct scsi_data_buffer *sdb, bool mq)
 {
-   if (mq  sdb-table.nents = SCSI_MAX_SG_SEGMENTS)
+   if (mq  sdb-table.orig_nents = SCSI_MAX_SG_SEGMENTS)
return;
__sg_free_table(sdb-table, SCSI_MAX_SG_SEGMENTS, mq, scsi_sg_free);
 }
@@ -597,8 +597,8 @@ static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, 
int nents, bool mq)
 
if (mq) {
if (nents = SCSI_MAX_SG_SEGMENTS) {
-   sdb-table.nents = nents;
-   sg_init_table(sdb-table.sgl, sdb-table.nents);
+   sdb-table.nents = sdb-table.orig_nents = nents;
+   sg_init_table(sdb-table.sgl, nents);
return 0;
}
first_chunk = sdb-table.sgl;

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] scsi: fix hang in scsi error handling

2015-07-16 Thread Kevin Groeneveld
 -Original Message-
 From: Hannes Reinecke [mailto:h...@suse.de]
 Sent: July-16-15 7:11 AM
  When the hang occurs shost-host_busy == 2 and shost-host_failed == 1
  in the scsi_eh_wakeup function. However this function only wakes the
  error handler if host_busy == host_failed.
 
 Which just means that one command is still outstanding, and we need to wait
 for it to complete.
 But see below...

So the root cause of the hang is maybe that the second command never
completes? Maybe host_failed being non zero is blocking something in the
port multiplier code?

 Hmm.
 I am really not sure about this.

I wasn't sure either, that is one reason why I posted the patch.

 'host_busy' indicates the number of outstanding commands, and
 'host_failed' is the number of commands which have failed (on the ground
 that failed commands are considered outstanding, too).
 
 So the first hunk would change the behaviour from 'start SCSI EH once all
 commands are completed or failed' to 'start SCSI EH for _any_ command if
 scsi_eh_wakeup is called'
 (note that shost_failed might be '0'...).
 Which doesn't sound right.

So could the patch create any problems by starting the EH any time
scsi_eh_wakeup is called? Or is it is just inefficient?

 I guess this needs further debugging to get to the bottom of it.

Any suggestions on things I could try?

The fact that the problem goes away when I only enable one CPU core makes
me think there is a race happening somewhere.


Kevin
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] ibmvscsi:Remove no longer required comments about return values in the file ibmvfc.c

2015-07-16 Thread Tyrel Datwyler
On 07/09/2015 10:41 AM, Nicholas Krause wrote:
 This removes the no longer require comments about the return values
 for the functions ibmvfc_init_host and  ibmvfc_reinit_host due to
 these functions being declared to have a return type of void thus
 making this comments invalid.
 
 Signed-off-by: Nicholas Krause xerofo...@gmail.com

Some grammar nit picking. Aside from a couple spelling errors the
wording is a little difficult. Refer to Documentation/SubmittingPatches
section 2. In particular it suggests using the imperative mood.
Something like this would work better:

Remove comments about return values from ibmvfc_init_host() and
ibmvfc_reinit_host() as they are both declared to have return type void.

Otherwise,

Acked-by: Tyrel Datwyler tyr...@linux.vnet.ibm.com

 ---
  drivers/scsi/ibmvscsi/ibmvfc.c | 4 
  1 file changed, 4 deletions(-)
 
 diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
 index 057d277..c6582db 100644
 --- a/drivers/scsi/ibmvscsi/ibmvfc.c
 +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
 @@ -528,8 +528,6 @@ static void ibmvfc_set_host_action(struct ibmvfc_host 
 *vhost,
   * ibmvfc_reinit_host - Re-start host initialization (no NPIV Login)
   * @vhost:   ibmvfc host struct
   *
 - * Return value:
 - *   nothing
   **/
  static void ibmvfc_reinit_host(struct ibmvfc_host *vhost)
  {
 @@ -570,8 +568,6 @@ static void ibmvfc_link_down(struct ibmvfc_host *vhost,
   * ibmvfc_init_host - Start host initialization
   * @vhost:   ibmvfc host struct
   *
 - * Return value:
 - *   nothing
   **/
  static void ibmvfc_init_host(struct ibmvfc_host *vhost)
  {
 

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 3/4] target: Return descriptor format sense data in case the LU spans 64bit sectors

2015-07-16 Thread Martin K. Petersen
 Sagi == Sagi Grimberg sa...@mellanox.com writes:

Sagi In case a LU spans 64bit sectors, fixed size sense data
Sagi information field is only 32 bits which means the sector
Sagi information will be truncated.

Sagi Thus, if the LU spans 64bit sectors, use descriptor format sense
Sagi data to correctly report sector information.

A-OK.

Reviewed-by: Martin K. Petersen martin.peter...@oracle.com

-- 
Martin K. Petersen  Oracle Linux Engineering
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] wd719x: Remove use of macro DEFINE_PCI_DEVICE_TABLE

2015-07-16 Thread Vaishali Thakkar
Macro DEFINE_PCI_DEVICE_TABLE is deprecated. So, here use
struct pci_device_id instead of DEFINE_PCI_DEVICE_TABLE with
the goal of getting rid of this macro completely.

The Coccinelle semantic patch that performs this transformation
is as follows:

@@
identifier a;
declarer name DEFINE_PCI_DEVICE_TABLE;
initializer i;
@@
- DEFINE_PCI_DEVICE_TABLE(a)
+ const struct pci_device_id a[]
= i;

Signed-off-by: Vaishali Thakkar vthakkar1...@gmail.com
---
This patch is against linux-next branch
---
 drivers/scsi/wd719x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/wd719x.c b/drivers/scsi/wd719x.c
index 61346aa..4655d16 100644
--- a/drivers/scsi/wd719x.c
+++ b/drivers/scsi/wd719x.c
@@ -962,7 +962,7 @@ static void wd719x_pci_remove(struct pci_dev *pdev)
scsi_host_put(sh);
 }
 
-static DEFINE_PCI_DEVICE_TABLE(wd719x_pci_table) = {
+static const struct pci_device_id wd719x_pci_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_WD, 0x3296) },
{}
 };
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] Using the local variable instead of I/O flag to acquire io_req_lock in fnic_queuecommand() to avoid deadloack

2015-07-16 Thread Martin K. Petersen
 Hiral == Hiral Shah his...@cisco.com writes:

Hiral We added changes in fnic driver patch 1.6.0.16 to acquire
Hiral io_req_lock in fnic_queuecommand() before issuing I/O so that io
Hiral completion is serialized. But when releasing the lock we check
Hiral for the I/O flag and this could be modified if IO abort occurs
Hiral before I/O completion. In this case we wont release the lock and
Hiral causes deadlock in some scenerios. Using the local variable to
Hiral check the IO lock status will resolve the problem.

Maybe bool instead of int?

Otherwise OK.

Reviewed-by: Martin K. Petersen martin.peter...@oracle.com

-- 
Martin K. Petersen  Oracle Linux Engineering
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] [RESEND] ipr: Fix locking for unit attention handling

2015-07-16 Thread Martin K. Petersen
 Brian == Brian King brk...@linux.vnet.ibm.com writes:

Brian Make sure we have the host lock held when calling
Brian scsi_report_bus_reset. Fixes a crash seen as the __devices list
Brian in the scsi host was changing as we were iterating through it.

Nit: I take it that renaming lock_flags to hrrq_flags makes sense. But
it muddles what is supposed to be a bugfix patch.

Reviewed-by: Martin K. Petersen martin.peter...@oracle.com

-- 
Martin K. Petersen  Oracle Linux Engineering
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] [RESEND] ipr: Fix incorrect trace indexing

2015-07-16 Thread Martin K. Petersen
 Brian == Brian King brk...@linux.vnet.ibm.com writes:

Brian When ipr's internal driver trace was changed to an atomic, a
Brian signed/unsigned bug slipped in which results in us indexing
Brian backwards in our memory buffer writing on memory that does not
Brian belong to us. This patch fixes this by removing the modulo and
Brian instead just mask off the low bits.

Reviewed-by: Martin K. Petersen martin.peter...@oracle.com

-- 
Martin K. Petersen  Oracle Linux Engineering
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] [RESEND] ipr: Fix invalid array indexing for HRRQ

2015-07-16 Thread Martin K. Petersen
 Brian == Brian King brk...@linux.vnet.ibm.com writes:

Brian Fixes another signed / unsigned array indexing bug in the ipr
Brian driver.  Currently, when hrrq_index wraps, it becomes a negative
Brian number. We do the modulo, but still have a negative number, so we
Brian end up indexing backwards in the array. Given where the hrrq
Brian array is located in memory, we probably won't actually reference
Brian memory we don't own, but nonetheless ipr is still looking at data
Brian within struct ipr_ioa_cfg and interpreting it as struct
Brian ipr_hrr_queue data, so bad things could certainly happen.

Reviewed-by: Martin K. Petersen martin.peter...@oracle.com

-- 
Martin K. Petersen  Oracle Linux Engineering
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/3] cxlflash: Virtual LUN support

2015-07-16 Thread Matthew R. Ochs
Add support for physical LUN segmentation (virtual LUNs) to device
driver supporting the IBM CXL Flash adapter. This patch allows user
space applications to virtually segment a physical LUN into N virtual
LUNs, taking advantage of the translation features provided by this
adapter.

Signed-off-by: Matthew R. Ochs mro...@linux.vnet.ibm.com
Signed-off-by: Manoj N. Kumar ma...@linux.vnet.ibm.com
---
 drivers/scsi/cxlflash/Makefile |2 +-
 drivers/scsi/cxlflash/common.h |3 +
 drivers/scsi/cxlflash/main.c   |   12 +
 drivers/scsi/cxlflash/sislite.h|3 +
 drivers/scsi/cxlflash/superpipe.c  |   56 ++
 drivers/scsi/cxlflash/superpipe.h  |   50 +-
 drivers/scsi/cxlflash/vlun.c   | 1187 
 drivers/scsi/cxlflash/vlun.h   |   68 +++
 include/uapi/scsi/cxlflash_ioctl.h |   29 +
 9 files changed, 1408 insertions(+), 2 deletions(-)
 create mode 100644 drivers/scsi/cxlflash/vlun.c
 create mode 100644 drivers/scsi/cxlflash/vlun.h

diff --git a/drivers/scsi/cxlflash/Makefile b/drivers/scsi/cxlflash/Makefile
index 3de309c..fac300b 100644
--- a/drivers/scsi/cxlflash/Makefile
+++ b/drivers/scsi/cxlflash/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_CXLFLASH) += cxlflash.o
-cxlflash-y += main.o superpipe.o
+cxlflash-y += main.o superpipe.o vlun.o
diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index 3b16857..12bfbac 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -114,6 +114,9 @@ struct cxlflash_cfg {
 
atomic_t num_user_contexts;
 
+   /* Parameters that are LUN table related */
+   int last_lun_index[CXLFLASH_NUM_FC_PORTS];
+   int promote_lun_index;
struct list_head lluns; /* list of llun_info structs */
spinlock_t slock;
 
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index b87964b..1ac9a0f 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -848,6 +848,8 @@ static void cxlflash_remove(struct pci_dev *pdev)
!cfg-tmf_active);
spin_unlock_irqrestore(cfg-tmf_waitq.lock, lock_flags);
 
+   cxlflash_stop_term_user_contexts(cfg);
+
switch (cfg-init_state) {
case INIT_STATE_SCSI:
cxlflash_term_luns(cfg);
@@ -2241,6 +2243,16 @@ static int cxlflash_probe(struct pci_dev *pdev,
 
cfg-init_state = INIT_STATE_NONE;
cfg-dev = pdev;
+
+   /* The promoted LUNs move to the top of the LUN table. The rest stay
+* on the bottom half. The bottom half grows from the end
+* (index = 255), whereas the top half grows from the beginning
+* (index = 0).
+*/
+   cfg-promote_lun_index  = 0;
+   cfg-last_lun_index[0] = CXLFLASH_NUM_VLUNS/2 - 1;
+   cfg-last_lun_index[1] = CXLFLASH_NUM_VLUNS/2 - 1;
+
cfg-dev_id = (struct pci_device_id *)dev_id;
cfg-eeh_active = EEH_STATE_NONE;
 
diff --git a/drivers/scsi/cxlflash/sislite.h b/drivers/scsi/cxlflash/sislite.h
index bf5d399..e599e7f 100644
--- a/drivers/scsi/cxlflash/sislite.h
+++ b/drivers/scsi/cxlflash/sislite.h
@@ -462,4 +462,7 @@ struct sisl_rht_entry_f1 {
 #define TMF_LUN_RESET  0x1U
 #define TMF_CLEAR_ACA  0x2U
 
+
+#define SISLITE_MAX_WS_BLOCKS 512
+
 #endif /* _SISLITE_H */
diff --git a/drivers/scsi/cxlflash/superpipe.c 
b/drivers/scsi/cxlflash/superpipe.c
index 631cc6b..97affc5 100644
--- a/drivers/scsi/cxlflash/superpipe.c
+++ b/drivers/scsi/cxlflash/superpipe.c
@@ -26,11 +26,25 @@
 
 #include sislite.h
 #include common.h
+#include vlun.h
 #include superpipe.h
 
 static struct cxlflash_global global;
 
 /**
+ * marshal_rele_to_resize() - translate release to resize structure
+ * @rele:  Source structure from which to translate/copy.
+ * @resize:Destination structure for the translate/copy.
+ */
+static void marshal_rele_to_resize(struct dk_cxlflash_release *release,
+  struct dk_cxlflash_resize *resize)
+{
+   resize-hdr = release-hdr;
+   resize-context_id = release-context_id;
+   resize-rsrc_handle = release-rsrc_handle;
+}
+
+/**
  * marshal_det_to_rele() - translate detach to release structure
  * @detach:Destination structure for the translate/copy.
  * @rele:  Source structure from which to translate/copy.
@@ -237,6 +251,7 @@ void cxlflash_list_terminate(void)
spin_lock_irqsave(global.slock, flags);
list_for_each_entry_safe(gli, temp, global.gluns, list) {
list_del(gli-list);
+   cxlflash_ba_terminate(gli-blka.ba_lun);
kfree(gli);
}
 
@@ -649,6 +664,9 @@ void cxlflash_lun_detach(struct glun_info *gli)
  * @ctxi:  Context owning resources.
  * @release:   Release ioctl data structure.
  *
+ * For LUN's in virtual mode, the virtual lun associated with the specified
+ * resource handle is resized to 0 prior to releasing the RHTE.
+ *
  * Return: 0 on success, -errno on failure
  

[PATCH v2 2/3] cxlflash: Superpipe support

2015-07-16 Thread Matthew R. Ochs
Add superpipe supporting infrastructure to device driver for the IBM CXL
Flash adapter. This patch allows userspace applications to take advantage
of the accelerated I/O features that this adapter provides and bypass the
traditional filesystem stack.

Signed-off-by: Matthew R. Ochs mro...@linux.vnet.ibm.com
Signed-off-by: Manoj N. Kumar ma...@linux.vnet.ibm.com
---
 Documentation/powerpc/cxlflash.txt |  298 ++
 drivers/scsi/cxlflash/Makefile |2 +-
 drivers/scsi/cxlflash/common.h |   17 +
 drivers/scsi/cxlflash/main.c   |   23 +-
 drivers/scsi/cxlflash/superpipe.c  | 2076 
 drivers/scsi/cxlflash/superpipe.h  |  142 +++
 include/uapi/scsi/Kbuild   |1 +
 include/uapi/scsi/cxlflash_ioctl.h |  132 +++
 8 files changed, 2689 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/powerpc/cxlflash.txt
 create mode 100644 drivers/scsi/cxlflash/superpipe.c
 create mode 100644 drivers/scsi/cxlflash/superpipe.h
 create mode 100644 include/uapi/scsi/cxlflash_ioctl.h

diff --git a/Documentation/powerpc/cxlflash.txt 
b/Documentation/powerpc/cxlflash.txt
new file mode 100644
index 000..c4d3849
--- /dev/null
+++ b/Documentation/powerpc/cxlflash.txt
@@ -0,0 +1,298 @@
+Introduction
+
+
+The IBM Power architecture provides support for CAPI (Coherent
+Accelerator Power Interface), which is available to certain PCIe slots
+on Power 8 systems. CAPI can be thought of as a special tunneling
+protocol through PCIe that allow PCIe adapters to look like special
+purpose co-processors which can read or write an application's
+memory and generate page faults. As a result, the host interface to
+an adapter running in CAPI mode does not require the data buffers to
+be mapped to the device's memory (IOMMU bypass) nor does it require
+memory to be pinned.
+
+On Linux, Coherent Accelerator (CXL) kernel services present CAPI
+devices as a PCI device by implementing a virtual PCI host bridge.
+This abstraction simplifies the infrastructure and programming
+model, allowing for drivers to look similar to other native PCI
+device drivers.
+
+CXL provides a mechanism by which user space applications can
+directly talk to a device (network or storage) bypassing the typical
+kernel/device driver stack. The CXL Flash Adapter Driver enables a
+user space application direct access to Flash storage.
+
+The CXL Flash Adapter Driver is a kernel module that sits in the
+SCSI stack as a low level device driver (below the SCSI disk and
+protocol drivers) for the IBM CXL Flash Adapter. This driver is
+responsible for the initialization of the adapter, setting up the
+special path for user space access, and performing error recovery. It
+communicates directly the Flash Accelerator Functional Unit (AFU)
+as described in Documentation/powerpc/cxl.txt.
+
+The cxlflash driver supports two, mutually exclusive, modes of
+operation at the device (LUN) level:
+
+- Any flash device (LUN) can be configured to be accessed as a
+  regular disk device (i.e.: /dev/sdc). This is the default mode.
+
+- Any flash device (LUN) can be configured to be accessed from
+  user space with a special block library. This mode further
+  specifies the means of accessing the device and provides for
+  either raw access to the entire LUN (referred to as direct
+  or physical LUN access) or access to a kernel/AFU-mediated
+  partition of the LUN (referred to as virtual LUN access). The
+  segmentation of a disk device into virtual LUNs is assisted
+  by special translation services provided by the Flash AFU.
+
+Overview
+
+
+The Coherent Accelerator Interface Architecture (CAIA) introduces a
+concept of a master context. A master typically has special privileges
+granted to it by the kernel or hypervisor allowing it to perform AFU
+wide management and control. The master may or may not be involved
+directly in each user I/O, but at the minimum is involved in the
+initial setup before the user application is allowed to send requests
+directly to the AFU.
+
+The CXL Flash Adapter Driver establishes a master context with the
+AFU. It uses memory mapped I/O (MMIO) for this control and setup. The
+Adapter Problem Space Memory Map looks like this:
+
+ +---+
+ |512 * 64 KB User MMIO  |
+ |(per context)  |
+ |   User Accessible |
+ +---+
+ |512 * 128 B per context|
+ |Provisioning and Control   |
+ |   Trusted Process accessible  |
+ +---+
+ |  

[PATCH v2 0/3] IBM CXL Flash Error Recovery and Superpipe

2015-07-16 Thread Matthew R. Ochs
This patch set is intended for the 4.3 release and adds support for
error recovery and the superpipe features provided by the IBM CXL
Flash adapter. The superpipe function was originally presented in an
RFC patch set in late April. To aid with the review of the superpipe
portion of these enhancements, we have further split it across the
last two patches in this set. Please reference the changelog below
for details on what has been altered from previous versions of this
patchset.

The IBM Power processor architecture provides support for CAPI (Coherent
Accelerator Power Interface), which is available to certain PCIe slots
on Power 8 systems. CAPI can be thought of as a special tunneling
protocol through PCIe that allow PCIe adapters to look like special
purpose co-processors which can read or write an application's memory
and generate page faults. As a result, the host interface to an adapter
running in CAPI mode does not require data buffers to be mapped to the
device's memory (IOMMU bypass) nor does it require memory to be pinned.

Application specific accelerators are constructed by burning logic
to either an FPGA or ASIC that accelerates a certain function in
hardware. This logic is referred to as an Accelerator Function Unit
or AFU. AFUs and their associated software are designed to leverage the
benefits that CAPI provides to reduce the burden on CPUs and achieve
higher performance. Examples of AFUs include compression, encryption,
sorting, etc.

The cxlflash adapter contains an AFU that enhances the performance of
accessing an external flash storage device by allowing user space
applications to establish a 'superpipe' through which they may directly
access the storage, bypassing the traditional storage stack and reducing
path length per-I/O by more than an order of magnitude. The AFU also
supports a translation function that allows users to segment a physical
device into 'n' virtual devices [by programmatic means] and refer to these
segments as if they were a true physical device. This function enables
a more efficient use of a physical device and provides for a secure
multi-tenant environment.

At a high-level, the cxlflash adapter looks and behaves very much like
a SCSI HBA. Like other SCSI adapters it understands SCSI CDBs and LUN
discovery. It also provides health monitoring, error recovery, and link
event reporting.

At a lower level, the cxlflash adapter requires some additional items not
found in a traditional SCSI HBA driver. These include the following:

- A programmatic API (implemented as ioctls) that user applications
interact with when they desire to take advantage of the superpipe access
from user space. These ioctls allow the user to gain access to the CAPI
resources (ie: interrupts, MMIO space, etc.) that are required to use
the superpipe. Additionally, they allow applications to use the AFUs
virtual partitioning function. Note that while the ioctls are new, under
the covers they make use of existing functionality found in the cxl
driver (drivers/misc/cxl).

- A block allocation table (implemented as a bitmap) per physical
device attached to the cxlflash adapter that is operating in the virtual
partitioned mode. This table manages the segmentation of the physical
device and is used to derive the entries found in the LUN mapping table.

- A LUN mapping table that is shared with the AFU and used by the AFU
to associate the resource handles referring to a specific virtual device
with blocks on the physical device.

- The ability to send a limited set of SCSI commands directly to the
adapter to determine capacity and identification data as well as wipe
blocks that are no longer in use when a virtual device is released. This
set of commands includes READ_CAPACITY and WRITE_SAME.

Accompanying this adapter driver but not included here is a user space
library (known as the block library) that will hide the interaction
between user space and the cxlflash driver. Most (if not all) users will
chose to use this library when developing superpipe-aware applications.

The block library can be found on Github:

  https://github.com/mikehollinger/ibmcapikv

More technical details can be found in Documentation/powerpc/cxlflash.txt

The following patches are bisectable:

Patch 1 contains base enablement of error recovery function.

Patch 2 contains base enablement of superpipe function.

Patch 3 adds support for segmentation of physical LUNs from user space.

v2 Changes:
- Introduce support for enhanced I/O error handling
- Incorporate review comments from Mikey Neuling
- Incorporate review comments from Brian King
- Update/add serialization strategy for contexts
- Lock down LUN lists and global struct
- Make items that can be static, static
- Remove duplicate statements
- Make shared counters atomic
- Use correct memory barrier for LWSYNC
- Add cxlflash_ioctl.h to UAPI Kbuild
- Remove unnecessary initialization code
- Fix spelling/grammar errors
- Remove the slave_* routines
- Tone down branch hints
- 

[PATCH v2 1/3] cxlflash: Base error recovery support

2015-07-16 Thread Matthew R. Ochs
Introduce support for enhanced I/O error handling.

Signed-off-by: Matthew R. Ochs mro...@linux.vnet.ibm.com
Signed-off-by: Manoj N. Kumar ma...@linux.vnet.ibm.com
---
 drivers/scsi/cxlflash/common.h |  11 +++-
 drivers/scsi/cxlflash/main.c   | 135 ++---
 2 files changed, 133 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index fe86bfe..155c2f7 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -76,6 +76,12 @@ enum cxlflash_init_state {
INIT_STATE_SCSI
 };
 
+enum eeh_state {
+   EEH_STATE_NONE,
+   EEH_STATE_ACTIVE,
+   EEH_STATE_FAILED
+};
+
 /*
  * Each context has its own set of resource handles that is visible
  * only from that context.
@@ -91,8 +97,6 @@ struct cxlflash_cfg {
 
ulong cxlflash_regs_pci;
 
-   wait_queue_head_t eeh_waitq;
-
struct work_struct work_q;
enum cxlflash_init_state init_state;
enum cxlflash_lr_state lr_state;
@@ -105,7 +109,8 @@ struct cxlflash_cfg {
 
wait_queue_head_t tmf_waitq;
bool tmf_active;
-   u8 err_recovery_active:1;
+   wait_queue_head_t eeh_waitq;
+   enum eeh_state eeh_active;
 };
 
 struct afu_cmd {
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 76a7286..e1f93ef 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -455,9 +455,18 @@ static int cxlflash_eh_device_reset_handler(struct 
scsi_cmnd *scp)
 get_unaligned_be32(((u32 *)scp-cmnd)[2]),
 get_unaligned_be32(((u32 *)scp-cmnd)[3]));
 
-   rcr = send_tmf(afu, scp, TMF_LUN_RESET);
-   if (unlikely(rcr))
-   rc = FAILED;
+   switch (cfg-eeh_active) {
+   case EEH_STATE_NONE:
+   rcr = send_tmf(afu, scp, TMF_LUN_RESET);
+   if (unlikely(rcr))
+   rc = FAILED;
+   break;
+   case EEH_STATE_ACTIVE:
+   wait_event(cfg-eeh_waitq, cfg-eeh_active != EEH_STATE_ACTIVE);
+   break;
+   case EEH_STATE_FAILED:
+   break;
+   }
 
pr_debug(%s: returning rc=%d\n, __func__, rc);
return rc;
@@ -487,11 +496,20 @@ static int cxlflash_eh_host_reset_handler(struct 
scsi_cmnd *scp)
 get_unaligned_be32(((u32 *)scp-cmnd)[2]),
 get_unaligned_be32(((u32 *)scp-cmnd)[3]));
 
-   rcr = cxlflash_afu_reset(cfg);
-   if (rcr == 0)
-   rc = SUCCESS;
-   else
-   rc = FAILED;
+   switch (cfg-eeh_active) {
+   case EEH_STATE_NONE:
+   rcr = cxlflash_afu_reset(cfg);
+   if (rcr == 0)
+   rc = SUCCESS;
+   else
+   rc = FAILED;
+   break;
+   case EEH_STATE_ACTIVE:
+   wait_event(cfg-eeh_waitq, cfg-eeh_active != EEH_STATE_ACTIVE);
+   break;
+   case EEH_STATE_FAILED:
+   break;
+   }
 
pr_debug(%s: returning rc=%d\n, __func__, rc);
return rc;
@@ -1879,6 +1897,8 @@ static int init_afu(struct cxlflash_cfg *cfg)
struct afu *afu = cfg-afu;
struct device *dev = cfg-dev-dev;
 
+   cxl_perst_reloads_same_image(cfg-cxl_afu, true);
+
rc = init_mc(cfg);
if (rc) {
dev_err(dev, %s: call to init_mc failed, rc=%d!\n,
@@ -2021,6 +2041,12 @@ void cxlflash_wait_resp(struct afu *afu, struct afu_cmd 
*cmd)
  * the sync. This design point requires calling threads to not be on interrupt
  * context due to the possibility of sleeping during concurrent sync 
operations.
  *
+ * AFU sync operations should be gated during EEH recovery. When a recovery
+ * fails and an adapter is to be removed, sync requests can occur as part of
+ * cleaning up resources associated with an adapter prior to its removal. In
+ * this scenario, these requests are identified here and simply ignored (safe
+ * due to the AFU going away).
+ *
  * Return:
  * 0 on success
  * -1 on failure
@@ -2028,11 +2054,17 @@ void cxlflash_wait_resp(struct afu *afu, struct afu_cmd 
*cmd)
 int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx_hndl_u,
  res_hndl_t res_hndl_u, u8 mode)
 {
+   struct cxlflash_cfg *cfg = afu-parent;
struct afu_cmd *cmd = NULL;
int rc = 0;
int retry_cnt = 0;
static DEFINE_MUTEX(sync_active);
 
+   if (cfg-eeh_active == EEH_STATE_FAILED) {
+   pr_debug(%s: Sync not required due to EEH state!\n, __func__);
+   return 0;
+   }
+
mutex_lock(sync_active);
 retry:
cmd = cxlflash_cmd_checkout(afu);
@@ -2122,6 +2154,11 @@ static void cxlflash_worker_thread(struct work_struct 
*work)
int port;
ulong lock_flags;
 
+   /* Avoid MMIO if the device has failed */
+
+   if (cfg-eeh_active == EEH_STATE_FAILED)
+   return;
+