Re: [PULL] vhost: infrastructure changes for 3.16

2014-06-12 Thread Romain Francoise
Michael S. Tsirkin m...@redhat.com writes:

 Memory allocation for vhost-net now supports fallback on vmalloc (same
 as for vhost-scsi) this makes it possible to create the device on
 systems where memory is very fragmented, with slightly lower
 performance.

Thanks Michael, I'm glad to see that this change made its way into
mainline after all! Would you be willing to take the following on top?


From: Romain Francoise rom...@orebokech.com
Date: Thu, 12 Jun 2014 10:26:40 +0200
Subject: [PATCH] vhost-net: don't open-code kvfree

Commit 23cc5a991c (vhost-net: extend device allocation to vmalloc)
added another open-coded version of kvfree (which is available since
v3.15-rc5), nuke it.

Signed-off-by: Romain Francoise rom...@orebokech.com
---
 drivers/vhost/net.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 971a760..8dae2f7 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -700,14 +700,6 @@ static void handle_rx_net(struct vhost_work *work)
handle_rx(net);
 }
 
-static void vhost_net_free(void *addr)
-{
-   if (is_vmalloc_addr(addr))
-   vfree(addr);
-   else
-   kfree(addr);
-}
-
 static int vhost_net_open(struct inode *inode, struct file *f)
 {
struct vhost_net *n;
@@ -723,7 +715,7 @@ static int vhost_net_open(struct inode *inode, struct file 
*f)
}
vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
if (!vqs) {
-   vhost_net_free(n);
+   kvfree(n);
return -ENOMEM;
}
 
@@ -840,7 +832,7 @@ static int vhost_net_release(struct inode *inode, struct 
file *f)
 * since jobs can re-queue themselves. */
vhost_net_flush(n);
kfree(n-dev.vqs);
-   vhost_net_free(n);
+   kvfree(n);
return 0;
 }
 
-- 
2.0.0

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v1 2/2] ata: Fix the dma state machine lockup for the PIO mode commands.

2014-06-12 Thread Suman Tripathi
This patch fixes the dma state machine lockup due to the PIO mode
commands. The controller is unable to clear the BSY bit after
receiving the PIO setup FIS and results the dma state machine
to go into the CMFataErrorUpdate state resulting in dma
state machine lockup.

Signed-off-by: Loc Ho l...@apm.com
Signed-off-by: Suman Tripathi stripa...@apm.com
---
 drivers/ata/ahci.h   |  2 ++
 drivers/ata/ahci_xgene.c | 17 +++--
 drivers/ata/libahci.c| 33 +
 3 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 3c1760e..1f911d1 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -237,6 +237,8 @@ enum {
error-handling stage) */
AHCI_HFLAG_MULTI_MSI= (1  16), /* multiple PCI MSIs */
AHCI_HFLAG_NO_DEVSLP= (1  17), /* no device sleep */
+   AHCI_HFLAG_BROKEN_PIO_CMD   = (1  18), /* PIO cmds resulting in
+   HBA dma state lockup */

/* ap-flags bits */

diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index 77c89bf..87e87a9 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -104,14 +104,12 @@ static int xgene_ahci_init_memram(struct 
xgene_ahci_context *ctx)
  * @id: data buffer
  *
  * This custom read ID function is required due to the fact that the HW
- * does not support DEVSLP and the controller state machine may get stuck
- * after processing the ID query command.
+ * does not support DEVSLP.
  */
 static unsigned int xgene_ahci_read_id(struct ata_device *dev,
   struct ata_taskfile *tf, u16 *id)
 {
u32 err_mask;
-   void __iomem *port_mmio = ahci_port_base(dev-link-ap);

err_mask = ata_do_dev_read_id(dev, tf, id);
if (err_mask)
@@ -133,16 +131,6 @@ static unsigned int xgene_ahci_read_id(struct ata_device 
*dev,
 */
id[ATA_ID_FEATURE_SUPP] = ~(1  8);

-   /*
-* Due to HW errata, restart the port if no other command active.
-* Otherwise the controller may get stuck.
-*/
-   if (!readl(port_mmio + PORT_CMD_ISSUE)) {
-   writel(PORT_CMD_FIS_RX, port_mmio + PORT_CMD);
-   readl(port_mmio + PORT_CMD);/* Force a barrier */
-   writel(PORT_CMD_FIS_RX | PORT_CMD_START, port_mmio + PORT_CMD);
-   readl(port_mmio + PORT_CMD);/* Force a barrier */
-   }
return 0;
 }

@@ -303,7 +291,8 @@ static struct ata_port_operations xgene_ahci_ops = {
 };

 static const struct ata_port_info xgene_ahci_port_info = {
-   AHCI_HFLAGS(AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ),
+   AHCI_HFLAGS(AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ |
+   AHCI_HFLAG_BROKEN_PIO_CMD),
.flags = AHCI_FLAG_COMMON | ATA_FLAG_NCQ,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 3358ce9..06d26b0 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1592,6 +1592,31 @@ static void ahci_fbs_dec_intr(struct ata_port *ap)
dev_err(ap-host-dev, failed to clear device error\n);
 }

+int ahci_complete_pio_cmd(struct ata_port *ap, u32 cmd_done)
+{
+   struct ata_queued_cmd *qc;
+
+   while (cmd_done) {
+   unsigned int tag = __ffs(cmd_done);
+
+   qc = ata_qc_from_tag(ap, tag);
+   if (qc) {
+   /*
+* Some controller unable to clear the BSY bit after
+* receiving the PIO Setup FIS from device resulting
+* the DMA state to go into CMFatalErrorUpdate state.
+* So need to restart the dma engine to get the
+* controller out of this state.
+*/
+   if (qc-tf.protocol == ATA_PROT_PIO)
+   ahci_restart_engine(ap);
+   }
+   cmd_done = ~(1  tag);
+   }
+
+   return 0;
+}
+
 static void ahci_error_intr(struct ata_port *ap, u32 irq_stat)
 {
struct ahci_host_priv *hpriv = ap-host-private_data;
@@ -1778,6 +1803,14 @@ static void ahci_handle_port_interrupt(struct ata_port 
*ap,
}


+   /* Due to HW errata, some controllers need special handling
+* of the completion of the PIO commands.
+*/
+   if (hpriv-flags  AHCI_HFLAG_BROKEN_PIO_CMD) {
+   u32 cmd_done = ap-qc_active ^ qc_active;
+   ahci_complete_pio_cmd(ap, cmd_done);
+   }
+
rc = ata_qc_complete_multiple(ap, qc_active);

/* while resetting, invalid completions are expected */
--
1.8.2.1

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

[PATCH v1 1/2] libahci: Implement the function ahci_restart_engine to restart the port dma engine.

2014-06-12 Thread Suman Tripathi
This patch implements the function ahci_restart_engine function to
restart the port dma engine.

Signed-off-by: Loc Ho l...@apm.com
Signed-off-by: Suman Tripathi stripa...@apm.com
---
 drivers/ata/ahci.h|  1 +
 drivers/ata/libahci.c | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index af63c75..3c1760e 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -372,6 +372,7 @@ int ahci_do_softreset(struct ata_link *link, unsigned int 
*class,

 int ahci_stop_engine(struct ata_port *ap);
 void ahci_start_engine(struct ata_port *ap);
+int ahci_restart_engine(struct ata_port *ap);
 int ahci_check_ready(struct ata_link *link);
 int ahci_kick_engine(struct ata_port *ap);
 int ahci_port_resume(struct ata_port *ap);
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index b986145..3358ce9 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -742,6 +742,16 @@ static int ahci_set_lpm(struct ata_link *link, enum 
ata_lpm_policy policy,
return 0;
 }

+int ahci_restart_engine(struct ata_port *ap)
+{
+   ahci_stop_engine(ap);
+   ahci_start_fis_rx(ap);
+   ahci_start_engine(ap);
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(ahci_restart_engine);
+
 #ifdef CONFIG_PM
 static void ahci_power_down(struct ata_port *ap)
 {
--
1.8.2.1

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v1 0/2]ata: Fix the dma state machine lockup for APM X-Gene SoC

2014-06-12 Thread Suman Tripathi
Suman Tripathi (2):
  libahci: Implement the function ahci_restart_engine to restart the
port dma engine.
  ata: Fix the dma state machine lockup for the PIO mode commands.

 drivers/ata/ahci.h   |  3 +++
 drivers/ata/ahci_xgene.c | 17 +++--
 drivers/ata/libahci.c| 43 +++
 3 files changed, 49 insertions(+), 14 deletions(-)

--
1.8.2.1

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


scsi-mq

2014-06-12 Thread Christoph Hellwig
With all the required blk-mq work, and the previous set of scsi midlayer
updates in Linus' tree this is the time for the first format scsi-mq
submission.

At this point the code is ready for merging and use by developers and early
adopters.  The core blk-mq code isn't that suitable for slow devices
yet, mostly due to the lack of an I/O scheduler, but Jens is working on it.
Similarly there is no dm-multipath support for drivers using blk-mq yet,
but I'm working on it.  It should also be noted that the code doesn't
actually support multiple hardware queues or fine grained tuning of the
blk-mq parameters yet.  All these could be added fairly easily as soon
as low-level drivers want to make use of them.

The amount of chances to the existing code are fairly small, and mostly
speedups or cleanups that also apply to the old path as well.  Because
of this I also haven't bothered to put it under a config option, just
like the blk-mq core.

The usage of blk-mq dramatically decreases CPU usage under all workloads going
down from 100% CPU usage that the old setup can hit easily to usually less
than 20% for maxing out storage subsystems with 512byte reads and writes,
and it allows to easily archive millions of IOPS.  Bart and Robert have
helped with some very detailed measurements that they might be able to send
in reply to this, although these usually involve significantly reworked low
level drivers to avoid other bottle necks.

One major objection to previous iterations of this code was the simple
replacement of the host_lock with atomic counters for the host and busy
counters.  The host_lock avoidance on it's own already improves performance,
and with the patch to avoid maintaining the per-target busy counter unless
needed we now replace a lock round trip on the host_lock with just a single
atomic increment in the submission path, and a single atomic decrement in
completion path, which should provide benefits even for the oddest RISC
architecture.  Longer term I'd still love to get rid of these entirely
and use the counters in blk-mq, but due to the difference in how they
are maintained this doesn't seem feasible as long as we still need to
support the legacy request code path.

In addition to the patches in this thread there also is a git available at:

git://git.infradead.org/users/hch/scsi.git scsi-mq

This work was sponsored by the ION division of Fusion IO.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/14] fnic: reject device resets without assigned tags for the blk-mq case

2014-06-12 Thread Christoph Hellwig
Current the midlayer fakes up a struct request for the explicit reset
ioctls, and those don't have a tag allocated to them.  The fnic driver pokes
into midlayer structures to paper over this design issue, but that won't
work for the blk-mq case.

Either someone who can actually test the hardware will have to come up with
a similar hack for the blk-mq case, or we'll have to bite the bullet and fix
the way the EH ioctls work for real, but until that happens we fail these
explicit requests here.

Signed-off-by: Christoph Hellwig h...@lst.de
Cc: Hiral Patel hiral...@cisco.com
Cc: Suma Ramars sram...@cisco.com
Cc: Brian Uchino buch...@cisco.com
---
 drivers/scsi/fnic/fnic_scsi.c |   16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index ea28b5c..f5dd7e0 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -2224,6 +2224,22 @@ int fnic_device_reset(struct scsi_cmnd *sc)
 
tag = sc-request-tag;
if (unlikely(tag  0)) {
+   /*
+* XXX(hch): current the midlayer fakes up a struct
+* request for the explicit reset ioctls, and those
+* don't have a tag allocated to them.  The below
+* code pokes into midlayer structures to paper over
+* this design issue, but that won't work for blk-mq.
+*
+* Either someone who can actually test the hardware
+* will have to come up with a similar hack for the
+* blk-mq case, or we'll have to bite the bullet and
+* fix the way the EH ioctls work for real, but until
+* that happens we fail these explicit requests here.
+*/
+   if (shost_use_blk_mq(sc-device-host))
+   goto fnic_device_reset_end;
+
tag = fnic_scsi_host_start_tag(fnic, sc);
if (unlikely(tag == SCSI_NO_TAG))
goto fnic_device_reset_end;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/14] scatterlist: allow chaining to preallocated chunks

2014-06-12 Thread Christoph Hellwig
Blk-mq drivers usually preallocate their S/G list as part of the request,
but if we want to support the very large S/G lists currently supported by
the SCSI code that would tie up a lot of memory in the preallocated request
pool.  Add support to the scatterlist code so that it can initialize a
S/G list that uses a preallocated first chunks and dynamically allocated
additional chunks.  That way the scsi-mq code can preallocate a first
page worth of S/G entries as part of the request, and dynamically extent
the S/G list when needed.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi_lib.c |   16 +++-
 include/linux/scatterlist.h |6 +++---
 lib/scatterlist.c   |   24 
 3 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e438726..32fbae4 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -567,6 +567,11 @@ static struct scatterlist *scsi_sg_alloc(unsigned int 
nents, gfp_t gfp_mask)
return mempool_alloc(sgp-pool, gfp_mask);
 }
 
+static void scsi_free_sgtable(struct scsi_data_buffer *sdb)
+{
+   __sg_free_table(sdb-table, SCSI_MAX_SG_SEGMENTS, false, scsi_sg_free);
+}
+
 static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents,
  gfp_t gfp_mask)
 {
@@ -575,19 +580,12 @@ static int scsi_alloc_sgtable(struct scsi_data_buffer 
*sdb, int nents,
BUG_ON(!nents);
 
ret = __sg_alloc_table(sdb-table, nents, SCSI_MAX_SG_SEGMENTS,
-  gfp_mask, scsi_sg_alloc);
+  NULL, gfp_mask, scsi_sg_alloc);
if (unlikely(ret))
-   __sg_free_table(sdb-table, SCSI_MAX_SG_SEGMENTS,
-   scsi_sg_free);
-
+   scsi_free_sgtable(sdb);
return ret;
 }
 
-static void scsi_free_sgtable(struct scsi_data_buffer *sdb)
-{
-   __sg_free_table(sdb-table, SCSI_MAX_SG_SEGMENTS, scsi_sg_free);
-}
-
 /*
  * Function:scsi_release_buffers()
  *
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index a964f72..f4ec8bb 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -229,10 +229,10 @@ void sg_init_one(struct scatterlist *, const void *, 
unsigned int);
 typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t);
 typedef void (sg_free_fn)(struct scatterlist *, unsigned int);
 
-void __sg_free_table(struct sg_table *, unsigned int, sg_free_fn *);
+void __sg_free_table(struct sg_table *, unsigned int, bool, sg_free_fn *);
 void sg_free_table(struct sg_table *);
-int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, gfp_t,
-sg_alloc_fn *);
+int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
+struct scatterlist *, gfp_t, sg_alloc_fn *);
 int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
 int sg_alloc_table_from_pages(struct sg_table *sgt,
struct page **pages, unsigned int n_pages,
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 3a8e8e8..48c15d2 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -165,6 +165,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int 
nents)
  * __sg_free_table - Free a previously mapped sg table
  * @table: The sg table header to use
  * @max_ents:  The maximum number of entries per single scatterlist
+ * @skip_first_chunk: don't free the (preallocated) first scatterlist chunk
  * @free_fn:   Free function
  *
  *  Description:
@@ -174,7 +175,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int 
nents)
  *
  **/
 void __sg_free_table(struct sg_table *table, unsigned int max_ents,
-sg_free_fn *free_fn)
+bool skip_first_chunk, sg_free_fn *free_fn)
 {
struct scatterlist *sgl, *next;
 
@@ -202,7 +203,9 @@ void __sg_free_table(struct sg_table *table, unsigned int 
max_ents,
}
 
table-orig_nents -= sg_size;
-   free_fn(sgl, alloc_size);
+   if (!skip_first_chunk)
+   free_fn(sgl, alloc_size);
+   skip_first_chunk = false;
sgl = next;
}
 
@@ -217,7 +220,7 @@ EXPORT_SYMBOL(__sg_free_table);
  **/
 void sg_free_table(struct sg_table *table)
 {
-   __sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree);
+   __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
 }
 EXPORT_SYMBOL(sg_free_table);
 
@@ -241,8 +244,8 @@ EXPORT_SYMBOL(sg_free_table);
  *
  **/
 int __sg_alloc_table(struct sg_table *table, unsigned int nents,
-unsigned int max_ents, gfp_t gfp_mask,
-sg_alloc_fn *alloc_fn)
+unsigned int max_ents, struct scatterlist *first_chunk,
+gfp_t gfp_mask, sg_alloc_fn *alloc_fn)
 {
struct scatterlist *sg, *prv;
unsigned int left;
@@ -269,7 +272,12 @@ int 

[PATCH 13/14] scsi: add support for a blk-mq based I/O path.

2014-06-12 Thread Christoph Hellwig
This patch adds support for an alternate I/O path in the scsi midlayer
which uses the blk-mq infrastructure instead of the legacy request code.

Use of blk-mq is fully transparent to drivers, although for now a host
template field is provided to opt out of blk-mq usage in case any unforseen
incompatibilities arise.

In general replacing the legacy request code with blk-mq is a simple and
mostly mechanical transformation.  The biggest exception is the new code
that deals with the fact the I/O submissions in blk-mq must happen from
process context, which slightly complicates the I/O completion handler.
The second biggest differences is that blk-mq is build around the concept
of preallocated requests that also include driver specific data, which
in SCSI context means the scsi_cmnd structure.  This completely avoids
dynamic memory allocations for the fast path through I/O submission.

Due the preallocated requests the MQ code path exclusively uses the
host-wide shared tag allocator instead of a per-LUN one.  This only
affects drivers actually using the block layer provided tag allocator
instead of their own.  Unlike the old path blk-mq always provides a tag,
although drivers don't have to use it.

For now the blk-mq path is disable by defauly and must be enabled using
the use_blk_mq module parameter.  Once the remaining work in the block
layer to make blk-mq more suitable for slow devices is complete I hope
to make it the default and eventually even remove the old code path.

Based on the earlier scsi-mq prototype by Nicholas Bellinger.

Thanks to Bart Van Assche and Robert Elliot for testing, benchmarking and
various sugestions and code contributions.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/hosts.c  |   30 ++-
 drivers/scsi/scsi.c   |5 +-
 drivers/scsi/scsi_lib.c   |  460 +++--
 drivers/scsi/scsi_priv.h  |3 +
 drivers/scsi/scsi_scan.c  |5 +-
 drivers/scsi/scsi_sysfs.c |2 +
 include/scsi/scsi_host.h  |   18 +-
 include/scsi/scsi_tcq.h   |   28 ++-
 8 files changed, 481 insertions(+), 70 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 3cbb57a..0dd6874 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -213,9 +213,24 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct 
device *dev,
goto fail;
}
 
+   if (shost_use_blk_mq(shost)) {
+   error = scsi_mq_setup_tags(shost);
+   if (error)
+   goto fail;
+   }
+
+   /*
+* Note that we allocate the freelist even for the MQ case for now,
+* as we need a command set aside for scsi_reset_provider.  Having
+* the full host freelist and one command available for that is a
+* little heavy-handed, but avoids introducing a special allocator
+* just for this.  Eventually the structure of scsi_reset_provider
+* will need a major overhaul.
+*/
error = scsi_setup_command_freelist(shost);
if (error)
-   goto fail;
+   goto out_destroy_tags;
+
 
if (!shost-shost_gendev.parent)
shost-shost_gendev.parent = dev ? dev : platform_bus;
@@ -226,7 +241,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct 
device *dev,
 
error = device_add(shost-shost_gendev);
if (error)
-   goto out;
+   goto out_destroy_freelist;
 
pm_runtime_set_active(shost-shost_gendev);
pm_runtime_enable(shost-shost_gendev);
@@ -279,8 +294,11 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct 
device *dev,
device_del(shost-shost_dev);
  out_del_gendev:
device_del(shost-shost_gendev);
- out:
+ out_destroy_freelist:
scsi_destroy_command_freelist(shost);
+ out_destroy_tags:
+   if (shost_use_blk_mq(shost))
+   scsi_mq_destroy_tags(shost);
  fail:
return error;
 }
@@ -309,7 +327,9 @@ static void scsi_host_dev_release(struct device *dev)
}
 
scsi_destroy_command_freelist(shost);
-   if (shost-bqt)
+   if (shost_use_blk_mq(shost)  shost-tag_set.tags)
+   scsi_mq_destroy_tags(shost);
+   else if (shost-bqt)
blk_free_tags(shost-bqt);
 
kfree(shost-shost_data);
@@ -436,6 +456,8 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template 
*sht, int privsize)
else
shost-dma_boundary = 0x;
 
+   shost-use_blk_mq = scsi_use_blk_mq  !shost-hostt-disable_blk_mq;
+
device_initialize(shost-shost_gendev);
dev_set_name(shost-shost_gendev, host%d, shost-host_no);
shost-shost_gendev.bus = scsi_bus_type;
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index e30509a..cc55b74 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -810,7 +810,7 @@ void scsi_adjust_queue_depth(struct scsi_device *sdev, int 
tagged, int tags)
 * is more IO 

[PATCH 10/14] scsi: only maintain target_blocked if the driver has a target queue limit

2014-06-12 Thread Christoph Hellwig
This saves us an atomic operation for each I/O submission and completion
for the usual case where the driver doesn't set a per-target can_queue
value.  Only a few iscsi hardware offload drivers set the per-target
can_queue value at the moment.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi_lib.c |   17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0e33dee..763b3c9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -295,7 +295,8 @@ void scsi_device_unbusy(struct scsi_device *sdev)
unsigned long flags;
 
atomic_dec(shost-host_busy);
-   atomic_dec(starget-target_busy);
+   if (starget-can_queue  0)
+   atomic_dec(starget-target_busy);
 
if (unlikely(scsi_host_in_recovery(shost) 
 (shost-host_failed || shost-host_eh_scheduled))) {
@@ -1335,6 +1336,9 @@ static inline int scsi_target_queue_ready(struct 
Scsi_Host *shost,
spin_unlock_irq(shost-host_lock);
}
 
+   if (starget-can_queue = 0)
+   return 1;
+
busy = atomic_inc_return(starget-target_busy) - 1;
if (busy == 0  atomic_read(starget-target_blocked)  0) {
if (atomic_dec_return(starget-target_blocked)  0)
@@ -1344,7 +1348,7 @@ static inline int scsi_target_queue_ready(struct 
Scsi_Host *shost,
 unblocking target at zero depth\n));
}
 
-   if (starget-can_queue  0  busy = starget-can_queue)
+   if (busy = starget-can_queue)
goto starved;
if (atomic_read(starget-target_blocked)  0)
goto starved;
@@ -1356,7 +1360,8 @@ starved:
list_move_tail(sdev-starved_entry, shost-starved_list);
spin_unlock_irq(shost-host_lock);
 out_dec:
-   atomic_dec(starget-target_busy);
+   if (starget-can_queue  0)
+   atomic_dec(starget-target_busy);
return 0;
 }
 
@@ -1473,7 +1478,8 @@ static void scsi_kill_request(struct request *req, struct 
request_queue *q)
 */
atomic_inc(sdev-device_busy);
atomic_inc(shost-host_busy);
-   atomic_inc(starget-target_busy);
+   if (starget-can_queue  0)
+   atomic_inc(starget-target_busy);
 
blk_complete_request(req);
 }
@@ -1642,7 +1648,8 @@ static void scsi_request_fn(struct request_queue *q)
return;
 
  host_not_ready:
-   atomic_dec(scsi_target(sdev)-target_busy);
+   if (scsi_target(sdev)-can_queue  0)
+   atomic_dec(scsi_target(sdev)-target_busy);
  not_ready:
/*
 * lock q, handle tag, requeue req, and decrement device_busy. We
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/14] scsi: convert device_busy to atomic_t

2014-06-12 Thread Christoph Hellwig
Avoid taking the queue_lock to check the per-device queue limit.  Instead
we do an atomic_inc_return early on to grab our slot in the queue,
and if nessecary decrement it after finishing all checks.

Unlike the host and target busy counters this doesn't allow us to avoid the
queue_lock in the request_fn due to the way the interface works, but it'll
allow us to prepare for using the blk-mq code, which doesn't use the
queue_lock at all, and it at least avoids a queue_lock rountrip in
scsi_device_unbusy, which is still important given how busy the queue_lock
is.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/message/fusion/mptsas.c |2 +-
 drivers/scsi/scsi_lib.c |   50 ++-
 drivers/scsi/scsi_sysfs.c   |   10 +++-
 drivers/scsi/sg.c   |2 +-
 include/scsi/scsi_device.h  |4 +---
 5 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 711fcb5..d636dbe 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -3763,7 +3763,7 @@ mptsas_send_link_status_event(struct fw_event_work 
*fw_event)
printk(MYIOC_s_DEBUG_FMT
SDEV OUTSTANDING CMDS
%d\n, ioc-name,
-   sdev-device_busy));
+   
atomic_read(sdev-device_busy)));
}
 
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 3f51bb8..c36c313 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -302,9 +302,7 @@ void scsi_device_unbusy(struct scsi_device *sdev)
spin_unlock_irqrestore(shost-host_lock, flags);
}
 
-   spin_lock_irqsave(sdev-request_queue-queue_lock, flags);
-   sdev-device_busy--;
-   spin_unlock_irqrestore(sdev-request_queue-queue_lock, flags);
+   atomic_dec(sdev-device_busy);
 }
 
 /*
@@ -355,9 +353,10 @@ static void scsi_single_lun_run(struct scsi_device 
*current_sdev)
 
 static inline int scsi_device_is_busy(struct scsi_device *sdev)
 {
-   if (sdev-device_busy = sdev-queue_depth || sdev-device_blocked)
+   if (atomic_read(sdev-device_busy) = sdev-queue_depth)
+   return 1;
+   if (sdev-device_blocked)
return 1;
-
return 0;
 }
 
@@ -1224,7 +1223,7 @@ scsi_prep_return(struct request_queue *q, struct request 
*req, int ret)
 * queue must be restarted, so we schedule a callback to happen
 * shortly.
 */
-   if (sdev-device_busy == 0)
+   if (atomic_read(sdev-device_busy) == 0)
blk_delay_queue(q, SCSI_QUEUE_DELAY);
break;
default:
@@ -1281,26 +1280,32 @@ static void scsi_unprep_fn(struct request_queue *q, 
struct request *req)
 static inline int scsi_dev_queue_ready(struct request_queue *q,
  struct scsi_device *sdev)
 {
-   if (sdev-device_busy == 0  sdev-device_blocked) {
+   unsigned int busy;
+
+   busy = atomic_inc_return(sdev-device_busy) - 1;
+   if (busy == 0  sdev-device_blocked) {
/*
 * unblock after device_blocked iterates to zero
 */
-   if (--sdev-device_blocked == 0) {
-   SCSI_LOG_MLQUEUE(3,
-  sdev_printk(KERN_INFO, sdev,
-  unblocking device at zero depth\n));
-   } else {
+   if (--sdev-device_blocked != 0) {
blk_delay_queue(q, SCSI_QUEUE_DELAY);
-   return 0;
+   goto out_dec;
}
+   SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev,
+  unblocking device at zero depth\n));
}
-   if (scsi_device_is_busy(sdev))
-   return 0;
+
+   if (busy = sdev-queue_depth)
+   goto out_dec;
+   if (sdev-device_blocked)
+   goto out_dec;
 
return 1;
+out_dec:
+   atomic_dec(sdev-device_busy);
+   return 0;
 }
 
-
 /*
  * scsi_target_queue_ready: checks if there we can send commands to target
  * @sdev: scsi device on starget to check.
@@ -1470,7 +1475,7 @@ static void scsi_kill_request(struct request *req, struct 
request_queue *q)
 * bump busy counts.  To bump the counters, we need to dance
 * with the locks as normal issue path does.
 */
-   sdev-device_busy++;
+   atomic_inc(sdev-device_busy);
atomic_inc(shost-host_busy);
atomic_inc(starget-target_busy);
 
@@ -1566,7 +1571,7 @@ static void scsi_request_fn(struct request_queue *q)
 * accept it.
 */
 

[PATCH 07/14] scsi: convert host_busy to atomic_t

2014-06-12 Thread Christoph Hellwig
Avoid taking the host-wide host_lock to check the per-host queue limit.
Instead we do an atomic_inc_return early on to grab our slot in the queue,
and if nessecary decrement it after finishing all checks.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/advansys.c |4 +-
 drivers/scsi/libiscsi.c |4 +-
 drivers/scsi/libsas/sas_scsi_host.c |5 ++-
 drivers/scsi/qlogicpti.c|2 +-
 drivers/scsi/scsi.c |2 +-
 drivers/scsi/scsi_error.c   |6 +--
 drivers/scsi/scsi_lib.c |   71 +--
 drivers/scsi/scsi_sysfs.c   |9 -
 include/scsi/scsi_host.h|   10 ++---
 9 files changed, 65 insertions(+), 48 deletions(-)

diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index d814588..0a6ecbd 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -2512,7 +2512,7 @@ static void asc_prt_scsi_host(struct Scsi_Host *s)
 
printk(Scsi_Host at addr 0x%p, device %s\n, s, dev_name(boardp-dev));
printk( host_busy %u, host_no %d,\n,
-  s-host_busy, s-host_no);
+  atomic_read(s-host_busy), s-host_no);
 
printk( base 0x%lx, io_port 0x%lx, irq %d,\n,
   (ulong)s-base, (ulong)s-io_port, boardp-irq);
@@ -3346,7 +3346,7 @@ static void asc_prt_driver_conf(struct seq_file *m, 
struct Scsi_Host *shost)
 
seq_printf(m,
host_busy %u, max_id %u, max_lun %u, max_channel %u\n,
-  shost-host_busy, shost-max_id,
+  atomic_read(shost-host_busy), shost-max_id,
   shost-max_lun, shost-max_channel);
 
seq_printf(m,
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index ecd7bd3..f4e9215 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2971,7 +2971,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 */
for (;;) {
spin_lock_irqsave(session-host-host_lock, flags);
-   if (!session-host-host_busy) { /* OK for ERL == 0 */
+   if (!atomic_read(session-host-host_busy)) { /* OK for ERL == 
0 */
spin_unlock_irqrestore(session-host-host_lock, flags);
break;
}
@@ -2979,7 +2979,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
msleep_interruptible(500);
iscsi_conn_printk(KERN_INFO, conn, iscsi conn_destroy(): 
  host_busy %d host_failed %d\n,
- session-host-host_busy,
+ atomic_read(session-host-host_busy),
  session-host-host_failed);
/*
 * force eh_abort() to unblock
diff --git a/drivers/scsi/libsas/sas_scsi_host.c 
b/drivers/scsi/libsas/sas_scsi_host.c
index 25d0f12..eec31b0 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -812,7 +812,7 @@ retry:
spin_unlock_irq(shost-host_lock);
 
SAS_DPRINTK(Enter %s busy: %d failed: %d\n,
-   __func__, shost-host_busy, shost-host_failed);
+   __func__, atomic_read(shost-host_busy), 
shost-host_failed);
/*
 * Deal with commands that still have SAS tasks (i.e. they didn't
 * complete via the normal sas_task completion mechanism),
@@ -857,7 +857,8 @@ out:
goto retry;
 
SAS_DPRINTK(--- Exit %s: busy: %d failed: %d tries: %d\n,
-   __func__, shost-host_busy, shost-host_failed, tries);
+   __func__, atomic_read(shost-host_busy),
+   shost-host_failed, tries);
 }
 
 enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index 6d48d30..740ae49 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -959,7 +959,7 @@ static inline void update_can_queue(struct Scsi_Host *host, 
u_int in_ptr, u_int
/* Temporary workaround until bug is found and fixed (one bug has been 
found
   already, but fixing it makes things even worse) -jj */
int num_free = QLOGICPTI_REQ_QUEUE_LEN - REQ_QUEUE_DEPTH(in_ptr, 
out_ptr) - 64;
-   host-can_queue = host-host_busy + num_free;
+   host-can_queue = atomic_read(host-host_busy) + num_free;
host-sg_tablesize = QLOGICPTI_MAX_SG(num_free);
 }
 
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 8ca9ed2..091329a 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -603,7 +603,7 @@ void scsi_log_completion(struct scsi_cmnd *cmd, int 
disposition)
if (level  3)
scmd_printk(KERN_INFO, cmd,
scsi host busy %d failed %d\n,
-   cmd-device-host-host_busy,
+   

[PATCH 09/14] scsi: fix the {host,target,device}_blocked counter mess

2014-06-12 Thread Christoph Hellwig
Seems like these counters are missing any sort of synchronization for
updates, as a over 10 year old comment from me noted.  Fix this by
using atomic counters, and while we're at it also make sure they are
in the same cacheline as the _busy counters and not needlessly stored
to in every I/O completion.

With the new model the _busy counters can temporarily go negative,
so all the readers are updated to check for  0 values.  Longer
term every successful I/O completion will reset the counters to zero,
so the temporarily negative values will not cause any harm.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi.c|   21 ++--
 drivers/scsi/scsi_lib.c|   82 +---
 drivers/scsi/scsi_sysfs.c  |   10 +-
 include/scsi/scsi_device.h |7 ++--
 include/scsi/scsi_host.h   |7 ++--
 5 files changed, 64 insertions(+), 63 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 091329a..e30509a 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -730,17 +730,16 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
 
scsi_device_unbusy(sdev);
 
-/*
- * Clear the flags which say that the device/host is no longer
- * capable of accepting new commands.  These are set in scsi_queue.c
- * for both the queue full condition on a device, and for a
- * host full condition on the host.
-*
-* XXX(hch): What about locking?
- */
-shost-host_blocked = 0;
-   starget-target_blocked = 0;
-sdev-device_blocked = 0;
+   /*
+* Clear the flags which say that the device/target/host is no longer
+* capable of accepting new commands.
+*/
+   if (atomic_read(shost-host_blocked))
+   atomic_set(shost-host_blocked, 0);
+   if (atomic_read(starget-target_blocked))
+   atomic_set(starget-target_blocked, 0);
+   if (atomic_read(sdev-device_blocked))
+   atomic_set(sdev-device_blocked, 0);
 
/*
 * If we have valid sense information, then some kind of recovery
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c36c313..0e33dee 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -99,14 +99,16 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
 */
switch (reason) {
case SCSI_MLQUEUE_HOST_BUSY:
-   host-host_blocked = host-max_host_blocked;
+   atomic_set(host-host_blocked, host-max_host_blocked);
break;
case SCSI_MLQUEUE_DEVICE_BUSY:
case SCSI_MLQUEUE_EH_RETRY:
-   device-device_blocked = device-max_device_blocked;
+   atomic_set(device-device_blocked,
+  device-max_device_blocked);
break;
case SCSI_MLQUEUE_TARGET_BUSY:
-   starget-target_blocked = starget-max_target_blocked;
+   atomic_set(starget-target_blocked,
+  starget-max_target_blocked);
break;
}
 }
@@ -351,30 +353,39 @@ static void scsi_single_lun_run(struct scsi_device 
*current_sdev)
spin_unlock_irqrestore(shost-host_lock, flags);
 }
 
-static inline int scsi_device_is_busy(struct scsi_device *sdev)
+static inline bool scsi_device_is_busy(struct scsi_device *sdev)
 {
if (atomic_read(sdev-device_busy) = sdev-queue_depth)
-   return 1;
-   if (sdev-device_blocked)
-   return 1;
+   return true;
+   if (atomic_read(sdev-device_blocked)  0)
+   return true;
return 0;
 }
 
-static inline int scsi_target_is_busy(struct scsi_target *starget)
+static inline bool scsi_target_is_busy(struct scsi_target *starget)
 {
-   return ((starget-can_queue  0 
-atomic_read(starget-target_busy) = starget-can_queue) ||
-starget-target_blocked);
+   if (starget-can_queue  0) {
+   if (atomic_read(starget-target_busy) = starget-can_queue)
+   return true;
+   if (atomic_read(starget-target_blocked)  0)
+   return true;
+   }
+
+   return false;
 }
 
-static inline int scsi_host_is_busy(struct Scsi_Host *shost)
+static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
 {
-   if ((shost-can_queue  0 
-atomic_read(shost-host_busy) = shost-can_queue) ||
-   shost-host_blocked || shost-host_self_blocked)
-   return 1;
+   if (shost-can_queue  0) {
+   if (atomic_read(shost-host_busy) = shost-can_queue)
+   return true;
+   if (atomic_read(shost-host_blocked)  0)
+   return true;
+   if (shost-host_self_blocked)
+   return true;
+   }
 
-   return 0;
+   return false;
 }
 
 static void scsi_starved_list_run(struct Scsi_Host *shost)
@@ 

[PATCH 11/14] scsi: unwind blk_end_request_all and blk_end_request_err calls

2014-06-12 Thread Christoph Hellwig
Replace the calls to the various blk_end_request variants with opencode
equivalents.  Blk-mq is using a model that gives the driver control
between the bio updates and the actual completion, and making the old
code follow that same model allows us to keep the code more similar for
both pathes.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi_lib.c |   61 ---
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 763b3c9..e438726 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -625,6 +625,37 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd 
*cmd)
cmd-request-next_rq-special = NULL;
 }
 
+static bool scsi_end_request(struct request *req, int error,
+   unsigned int bytes, unsigned int bidi_bytes)
+{
+   struct scsi_cmnd *cmd = req-special;
+   struct scsi_device *sdev = cmd-device;
+   struct request_queue *q = sdev-request_queue;
+   unsigned long flags;
+
+
+   if (blk_update_request(req, error, bytes))
+   return true;
+
+   /* Bidi request must be completed as a whole */
+   if (unlikely(bidi_bytes) 
+   blk_update_request(req-next_rq, error, bidi_bytes))
+   return true;
+
+   if (blk_queue_add_random(q))
+   add_disk_randomness(req-rq_disk);
+
+   spin_lock_irqsave(q-queue_lock, flags);
+   blk_finish_request(req, error);
+   spin_unlock_irqrestore(q-queue_lock, flags);
+
+   if (bidi_bytes)
+   scsi_release_bidi_buffers(cmd);
+   scsi_release_buffers(cmd);
+   scsi_next_command(cmd);
+   return false;
+}
+
 /**
  * __scsi_error_from_host_byte - translate SCSI error code into errno
  * @cmd:   SCSI command (unused)
@@ -697,7 +728,7 @@ static int __scsi_error_from_host_byte(struct scsi_cmnd 
*cmd, int result)
  *be put back on the queue and retried using the same
  *command as before, possibly after a delay.
  *
- * c) We can call blk_end_request() with -EIO to fail
+ * c) We can call scsi_end_request() with -EIO to fail
  *the remainder of the request.
  */
 void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
@@ -749,13 +780,9 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned 
int good_bytes)
 * both sides at once.
 */
req-next_rq-resid_len = scsi_in(cmd)-resid;
-
-   scsi_release_buffers(cmd);
-   scsi_release_bidi_buffers(cmd);
-
-   blk_end_request_all(req, 0);
-
-   scsi_next_command(cmd);
+   if (scsi_end_request(req, 0, blk_rq_bytes(req),
+   blk_rq_bytes(req-next_rq)))
+   BUG();
return;
}
}
@@ -794,15 +821,16 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned 
int good_bytes)
/*
 * If we finished all bytes in the request we are done now.
 */
-   if (!blk_end_request(req, error, good_bytes))
-   goto next_command;
+   if (!scsi_end_request(req, error, good_bytes, 0))
+   return;
 
/*
 * Kill remainder if no retrys.
 */
if (error  scsi_noretry_cmd(cmd)) {
-   blk_end_request_all(req, error);
-   goto next_command;
+   if (scsi_end_request(req, error, blk_rq_bytes(req), 0))
+   BUG();
+   return;
}
 
/*
@@ -947,8 +975,8 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int 
good_bytes)
scsi_print_sense(, cmd);
scsi_print_command(cmd);
}
-   if (!blk_end_request_err(req, error))
-   goto next_command;
+   if (!scsi_end_request(req, error, blk_rq_err_bytes(req), 0))
+   return;
/*FALLTHRU*/
case ACTION_REPREP:
requeue:
@@ -967,11 +995,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned 
int good_bytes)
__scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, 0);
break;
}
-   return;
-
-next_command:
-   scsi_release_buffers(cmd);
-   scsi_next_command(cmd);
 }
 
 static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/14] scsi: convert target_busy to an atomic_t

2014-06-12 Thread Christoph Hellwig
Avoid taking the host-wide host_lock to check the per-target queue limit.
Instead we do an atomic_inc_return early on to grab our slot in the queue,
and if nessecary decrement it after finishing all checks.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi_lib.c|   52 ++--
 include/scsi/scsi_device.h |4 ++--
 2 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 3d90340..9e288e6 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -294,7 +294,7 @@ void scsi_device_unbusy(struct scsi_device *sdev)
 
spin_lock_irqsave(shost-host_lock, flags);
shost-host_busy--;
-   starget-target_busy--;
+   atomic_dec(starget-target_busy);
if (unlikely(scsi_host_in_recovery(shost) 
 (shost-host_failed || shost-host_eh_scheduled)))
scsi_eh_wakeup(shost);
@@ -361,7 +361,7 @@ static inline int scsi_device_is_busy(struct scsi_device 
*sdev)
 static inline int scsi_target_is_busy(struct scsi_target *starget)
 {
return ((starget-can_queue  0 
-starget-target_busy = starget-can_queue) ||
+atomic_read(starget-target_busy) = starget-can_queue) ||
 starget-target_blocked);
 }
 
@@ -1305,37 +1305,49 @@ static inline int scsi_target_queue_ready(struct 
Scsi_Host *shost,
   struct scsi_device *sdev)
 {
struct scsi_target *starget = scsi_target(sdev);
-   int ret = 0;
+   unsigned int busy;
 
-   spin_lock_irq(shost-host_lock);
if (starget-single_lun) {
+   spin_lock_irq(shost-host_lock);
if (starget-starget_sdev_user 
-   starget-starget_sdev_user != sdev)
-   goto out;
+   starget-starget_sdev_user != sdev) {
+   spin_unlock_irq(shost-host_lock);
+   return 0;
+   }
starget-starget_sdev_user = sdev;
+   spin_unlock_irq(shost-host_lock);
}
 
-   if (starget-target_busy == 0  starget-target_blocked) {
+   busy = atomic_inc_return(starget-target_busy) - 1;
+   if (busy == 0  starget-target_blocked) {
/*
 * unblock after target_blocked iterates to zero
 */
-   if (--starget-target_blocked != 0)
-   goto out;
+   spin_lock_irq(shost-host_lock);
+   if (--starget-target_blocked != 0) {
+   spin_unlock_irq(shost-host_lock);
+   goto out_dec;
+   }
+   spin_unlock_irq(shost-host_lock);
 
SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget,
 unblocking target at zero depth\n));
}
 
-   if (scsi_target_is_busy(starget)) {
-   list_move_tail(sdev-starved_entry, shost-starved_list);
-   goto out;
-   }
+   if (starget-can_queue  0  busy = starget-can_queue)
+   goto starved;
+   if (starget-target_blocked)
+   goto starved;
 
-   scsi_target(sdev)-target_busy++;
-   ret = 1;
-out:
+   return 1;
+
+starved:
+   spin_lock_irq(shost-host_lock);
+   list_move_tail(sdev-starved_entry, shost-starved_list);
spin_unlock_irq(shost-host_lock);
-   return ret;
+out_dec:
+   atomic_dec(starget-target_busy);
+   return 0;
 }
 
 /*
@@ -1445,7 +1457,7 @@ static void scsi_kill_request(struct request *req, struct 
request_queue *q)
spin_unlock(sdev-request_queue-queue_lock);
spin_lock(shost-host_lock);
shost-host_busy++;
-   starget-target_busy++;
+   atomic_inc(starget-target_busy);
spin_unlock(shost-host_lock);
spin_lock(sdev-request_queue-queue_lock);
 
@@ -1615,9 +1627,7 @@ static void scsi_request_fn(struct request_queue *q)
return;
 
  host_not_ready:
-   spin_lock_irq(shost-host_lock);
-   scsi_target(sdev)-target_busy--;
-   spin_unlock_irq(shost-host_lock);
+   atomic_dec(scsi_target(sdev)-target_busy);
  not_ready:
/*
 * lock q, handle tag, requeue req, and decrement device_busy. We
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 5853c91..560847b 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -290,8 +290,8 @@ struct scsi_target {
unsigned intexpecting_lun_change:1; /* A device has reported
 * a 3F/0E UA, other devices on
 * the same target will also. */
-   /* commands actually active on LLD. protected by host lock. */
-   unsigned inttarget_busy;
+   /* commands actually active on LLD. */
+   atomic_ttarget_busy;
 

[PATCH 04/14] scsi: set -scsi_done before calling scsi_dispatch_cmd

2014-06-12 Thread Christoph Hellwig
The blk-mq code path will set this to a different function, so make the
code simpler by setting it up in a legacy-request specific place.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi.c |   23 +--
 drivers/scsi/scsi_lib.c |   20 
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index b91abab..8ca9ed2 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -72,8 +72,6 @@
 #define CREATE_TRACE_POINTS
 #include trace/events/scsi.h
 
-static void scsi_done(struct scsi_cmnd *cmd);
-
 /*
  * Definitions and constants.
  */
@@ -695,8 +693,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
}
 
trace_scsi_dispatch_cmd_start(cmd);
-
-   cmd-scsi_done = scsi_done;
rtn = host-hostt-queuecommand(host, cmd);
if (rtn) {
trace_scsi_dispatch_cmd_error(cmd, rtn);
@@ -712,28 +708,11 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
return rtn;
  done:
SCSI_LOG_MLQUEUE(3, printk(scsi_dispatch_cmnd() failed\n));
-   scsi_done(cmd);
+   cmd-scsi_done(cmd);
return 0;
 }
 
 /**
- * scsi_done - Invoke completion on finished SCSI command.
- * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
- * ownership back to SCSI Core -- i.e. the LLDD has finished with it.
- *
- * Description: This function is the mid-level's (SCSI Core) interrupt routine,
- * which regains ownership of the SCSI command (de facto) from a LLDD, and
- * calls blk_complete_request() for further processing.
- *
- * This function is interrupt context safe.
- */
-static void scsi_done(struct scsi_cmnd *cmd)
-{
-   trace_scsi_dispatch_cmd_done(cmd);
-   blk_complete_request(cmd-request);
-}
-
-/**
  * scsi_finish_command - cleanup and pass command back to upper layer
  * @cmd: the command
  *
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 98eb358..f0ec249 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -29,6 +29,8 @@
 #include scsi/scsi_eh.h
 #include scsi/scsi_host.h
 
+#include trace/events/scsi.h
+
 #include scsi_priv.h
 #include scsi_logging.h
 
@@ -1480,6 +1482,23 @@ static void scsi_softirq_done(struct request *rq)
}
 }
 
+/**
+ * scsi_done - Invoke completion on finished SCSI command.
+ * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
+ * ownership back to SCSI Core -- i.e. the LLDD has finished with it.
+ *
+ * Description: This function is the mid-level's (SCSI Core) interrupt routine,
+ * which regains ownership of the SCSI command (de facto) from a LLDD, and
+ * calls blk_complete_request() for further processing.
+ *
+ * This function is interrupt context safe.
+ */
+static void scsi_done(struct scsi_cmnd *cmd)
+{
+   trace_scsi_dispatch_cmd_done(cmd);
+   blk_complete_request(cmd-request);
+}
+
 /*
  * Function:scsi_request_fn()
  *
@@ -1582,6 +1601,7 @@ static void scsi_request_fn(struct request_queue *q)
/*
 * Dispatch the command to the low-level driver.
 */
+   cmd-scsi_done = scsi_done;
rtn = scsi_dispatch_cmd(cmd);
if (rtn) {
scsi_queue_insert(cmd, rtn);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/14] sd: don't use rq-cmd_len before setting it up

2014-06-12 Thread Christoph Hellwig
Unlike the old request code blk-mq doesn't initialize cmd_len with a
default value, so don't rely on it being set in sd_setup_write_same_cmnd.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/sd.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e9689d5..dbd0c51 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -810,15 +810,16 @@ static int sd_setup_write_same_cmnd(struct scsi_device 
*sdp, struct request *rq)
 
rq-__data_len = sdp-sector_size;
rq-timeout = SD_WRITE_SAME_TIMEOUT;
-   memset(rq-cmd, 0, rq-cmd_len);
 
if (sdkp-ws16 || sector  0x || nr_sectors  0x) {
rq-cmd_len = 16;
+   memset(rq-cmd, 0, rq-cmd_len);
rq-cmd[0] = WRITE_SAME_16;
put_unaligned_be64(sector, rq-cmd[2]);
put_unaligned_be32(nr_sectors, rq-cmd[10]);
} else {
rq-cmd_len = 10;
+   memset(rq-cmd, 0, rq-cmd_len);
rq-cmd[0] = WRITE_SAME;
put_unaligned_be32(sector, rq-cmd[2]);
put_unaligned_be16(nr_sectors, rq-cmd[7]);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/14] scsi: centralize command re-queueing in scsi_dispatch_fn

2014-06-12 Thread Christoph Hellwig
Make sure we only have the logic for requeing commands in one place.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi.c |   36 +---
 drivers/scsi/scsi_lib.c |9 ++---
 2 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 88d46fe..b91abab 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -648,9 +648,7 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 * returns an immediate error upwards, and signals
 * that the device is no longer present */
cmd-result = DID_NO_CONNECT  16;
-   scsi_done(cmd);
-   /* return 0 (because the command has been processed) */
-   goto out;
+   goto done;
}
 
/* Check to see if the scsi lld made this device blocked. */
@@ -662,16 +660,8 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 * occur until the device transitions out of the
 * suspend state.
 */
-
-   scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
-
SCSI_LOG_MLQUEUE(3, printk(queuecommand : device blocked \n));
-
-   /*
-* NOTE: rtn is still zero here because we don't need the
-* queue to be plugged on return (it's already stopped)
-*/
-   goto out;
+   return SCSI_MLQUEUE_DEVICE_BUSY;
}
 
/* 
@@ -695,35 +685,35 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
   cdb_size=%d host-max_cmd_len=%d\n,
   cmd-cmd_len, cmd-device-host-max_cmd_len));
cmd-result = (DID_ABORT  16);
-
-   scsi_done(cmd);
-   goto out;
+   goto done;
}
 
if (unlikely(host-shost_state == SHOST_DEL)) {
cmd-result = (DID_NO_CONNECT  16);
-   scsi_done(cmd);
-   } else {
-   trace_scsi_dispatch_cmd_start(cmd);
-   cmd-scsi_done = scsi_done;
-   rtn = host-hostt-queuecommand(host, cmd);
+   goto done;
+
}
 
+   trace_scsi_dispatch_cmd_start(cmd);
+
+   cmd-scsi_done = scsi_done;
+   rtn = host-hostt-queuecommand(host, cmd);
if (rtn) {
trace_scsi_dispatch_cmd_error(cmd, rtn);
if (rtn != SCSI_MLQUEUE_DEVICE_BUSY 
rtn != SCSI_MLQUEUE_TARGET_BUSY)
rtn = SCSI_MLQUEUE_HOST_BUSY;
 
-   scsi_queue_insert(cmd, rtn);
-
SCSI_LOG_MLQUEUE(3,
printk(queuecommand : request rejected\n));
}
 
- out:
SCSI_LOG_MLQUEUE(3, printk(leaving scsi_dispatch_cmnd()\n));
return rtn;
+ done:
+   SCSI_LOG_MLQUEUE(3, printk(scsi_dispatch_cmnd() failed\n));
+   scsi_done(cmd);
+   return 0;
 }
 
 /**
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7662168..98eb358 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1583,9 +1583,12 @@ static void scsi_request_fn(struct request_queue *q)
 * Dispatch the command to the low-level driver.
 */
rtn = scsi_dispatch_cmd(cmd);
-   spin_lock_irq(q-queue_lock);
-   if (rtn)
+   if (rtn) {
+   scsi_queue_insert(cmd, rtn);
+   spin_lock_irq(q-queue_lock);
goto out_delay;
+   }
+   spin_lock_irq(q-queue_lock);
}
 
return;
@@ -1605,7 +1608,7 @@ static void scsi_request_fn(struct request_queue *q)
blk_requeue_request(q, req);
sdev-device_busy--;
 out_delay:
-   if (sdev-device_busy == 0)
+   if (sdev-device_busy == 0  !scsi_device_blocked(sdev))
blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/14] scsi: split __scsi_queue_insert

2014-06-12 Thread Christoph Hellwig
Factor out a helper to set the _blocked values, which we'll reuse for the
blk-mq code path.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi_lib.c |   44 ++--
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index f7e3163..7662168 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -75,28 +75,12 @@ struct kmem_cache *scsi_sdb_cache;
  */
 #define SCSI_QUEUE_DELAY   3
 
-/**
- * __scsi_queue_insert - private queue insertion
- * @cmd: The SCSI command being requeued
- * @reason:  The reason for the requeue
- * @unbusy: Whether the queue should be unbusied
- *
- * This is a private queue insertion.  The public interface
- * scsi_queue_insert() always assumes the queue should be unbusied
- * because it's always called before the completion.  This function is
- * for a requeue after completion, which should only occur in this
- * file.
- */
-static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
+static void
+scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
 {
struct Scsi_Host *host = cmd-device-host;
struct scsi_device *device = cmd-device;
struct scsi_target *starget = scsi_target(device);
-   struct request_queue *q = device-request_queue;
-   unsigned long flags;
-
-   SCSI_LOG_MLQUEUE(1,
-printk(Inserting command %p into mlqueue\n, cmd));
 
/*
 * Set the appropriate busy bit for the device/host.
@@ -123,6 +107,30 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int 
reason, int unbusy)
starget-target_blocked = starget-max_target_blocked;
break;
}
+}
+
+/**
+ * __scsi_queue_insert - private queue insertion
+ * @cmd: The SCSI command being requeued
+ * @reason:  The reason for the requeue
+ * @unbusy: Whether the queue should be unbusied
+ *
+ * This is a private queue insertion.  The public interface
+ * scsi_queue_insert() always assumes the queue should be unbusied
+ * because it's always called before the completion.  This function is
+ * for a requeue after completion, which should only occur in this
+ * file.
+ */
+static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
+{
+   struct scsi_device *device = cmd-device;
+   struct request_queue *q = device-request_queue;
+   unsigned long flags;
+
+   SCSI_LOG_MLQUEUE(1,
+printk(Inserting command %p into mlqueue\n, cmd));
+
+   scsi_set_blocked(cmd, reason);
 
/*
 * Decrement the counters, since these commands are no longer
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] hpsa: refine the pci enble/disable handling

2014-06-12 Thread Tomas Henzl
When a second(kdump) kernel starts and the hard reset method is used
the driver calls pci_disable_device without previously enabling it,
so the kernel shows a warning -
[   16.876248] WARNING: at drivers/pci/pci.c:1431 pci_disable_device+0x84/0x90()
[   16.882686] Device hpsa
disabling already-disabled device
...
This patch fixes it, in addition to this I tried to balance also some other 
pairs
of enable/disable device in the driver.
Unfortunately I wasn't able to verify the functionality for the case of a sw 
reset,
because of a lack of proper hw.

Signed-off-by: Tomas Henzl the...@redhat.com
---
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 5858600..67c41b9 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -5983,7 +5983,6 @@ static int hpsa_kdump_hard_reset_controller(struct 
pci_dev *pdev)
/* Turn the board off.  This is so that later pci_restore_state()
 * won't turn the board on before the rest of config space is ready.
 */
-   pci_disable_device(pdev);
pci_save_state(pdev);
 
/* find the first memory BAR, so we can find the cfg table */
@@ -6031,11 +6030,6 @@ static int hpsa_kdump_hard_reset_controller(struct 
pci_dev *pdev)
goto unmap_cfgtable;
 
pci_restore_state(pdev);
-   rc = pci_enable_device(pdev);
-   if (rc) {
-   dev_warn(pdev-dev, failed to enable device.\n);
-   goto unmap_cfgtable;
-   }
pci_write_config_word(pdev, 4, command_register);
 
/* Some devices (notably the HP Smart Array 5i Controller)
@@ -6548,6 +6542,12 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev)
if (!reset_devices)
return 0;
 
+   rc = pci_enable_device(pdev);
+   if (rc) {
+   dev_warn(pdev-dev, failed to enable device.\n);
+   return -ENODEV;
+   }
+
/* Reset the controller with a PCI power-cycle or via doorbell */
rc = hpsa_kdump_hard_reset_controller(pdev);
 
@@ -6556,10 +6556,11 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev)
 * performant mode.  Or, it might be 640x, which can't reset
 * due to concerns about shared bbwc between 6402/6404 pair.
 */
-   if (rc == -ENOTSUPP)
-   return rc; /* just try to do the kdump anyhow. */
-   if (rc)
-   return -ENODEV;
+   if (rc) {
+   if (rc != -ENOTSUPP) /* just try to do the kdump anyhow. */
+   rc = -ENODEV;
+   goto out_disable; 
+   }
 
/* Now try to get the controller to respond to a no-op */
dev_warn(pdev-dev, Waiting for controller to respond to no-op\n);
@@ -6570,7 +6571,11 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev)
dev_warn(pdev-dev, no-op failed%s\n,
(i  11 ? ; re-trying : ));
}
-   return 0;
+
+out_disable:
+
+   pci_disable_device(pdev);
+   return rc;
 }
 
 static int hpsa_allocate_cmd_pool(struct ctlr_info *h)
@@ -6722,6 +6727,7 @@ static void 
hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
iounmap(h-transtable);
if (h-cfgtable)
iounmap(h-cfgtable);
+   pci_disable_device(h-pdev);
pci_release_regions(h-pdev);
kfree(h);
 }
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


All devices on host blocked

2014-06-12 Thread Michael Robbert
I have a large JBOD attached to my server via an LSI SAS2308 PCI card(mpt2sas 
driver). I've got about 40 drives right now assembled into 4 Linux software 
RAID sets and I am using those RAID volumes as back end devices for GPFS. 
Everything was working fine about a week ago when I had 20 drives and 2 RAID 
volumes then I added 20 new disks, all the same model, and now I am frequently 
seeing all the devices behind the SAS card reporting device_blocked immediately 
followed by device_unblocked. These events are correlated with a period of many 
seconds of no data throughput. This is happening often enough to cause major 
throughput problems. I have seen similar problem in the past, but they were 
accompanied by some kind of disk specific error and I could fix the situation 
by removing the disk. In this case there are no other errors in any log besides 
the device_blocked and device_unblocked on every single device.
This system is not in production yet so I can blow it all away if I need to, 
but I really want to understand what is causing this so that if it does come 
back once we go into production I'll be able to fix it without major 
disruptions. I suspect there is a misbehaving drive, but there is nothing 
pointing to a single drive and I could be completely wrong about that. Does 
anybody have any clue where to look?

Here is what the error logs look like:

Jun 11 19:29:17 storage003 kernel: sd 6:0:0:0: device_blocked, handle(0x0016)
Jun 11 19:29:17 storage003 kernel: sd 6:0:1:0: device_blocked, handle(0x000b)
Jun 11 19:29:17 storage003 kernel: sd 6:0:2:0: device_blocked, handle(0x000c)
Jun 11 19:29:17 storage003 kernel: ses 6:0:3:0: device_blocked, handle(0x000e)
Jun 11 19:29:17 storage003 kernel: sd 6:0:4:0: device_blocked, handle(0x000f)
Jun 11 19:29:17 storage003 kernel: sd 6:0:5:0: device_blocked, handle(0x0010)
... Same thing for the rest of the devices on host6
Jun 11 19:29:18 storage003 kernel: sd 6:0:0:0: device_unblocked and set to 
running, handle(0x0016)
Jun 11 19:29:18 storage003 kernel: sd 6:0:1:0: device_unblocked and set to 
running, handle(0x000b)
Jun 11 19:29:18 storage003 kernel: sd 6:0:2:0: device_unblocked and set to 
running, handle(0x000c)
Jun 11 19:29:18 storage003 kernel: ses 6:0:3:0: device_unblocked and set to 
running, handle(0x000e)
Jun 11 19:29:18 storage003 kernel: sd 6:0:4:0: device_unblocked and set to 
running, handle(0x000f)
Jun 11 19:29:18 storage003 kernel: sd 6:0:5:0: device_unblocked and set to 
running, handle(0x0010)
... Same thing for the rest of the devices again.

Thanks,
Mike Robbert

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] bnx2fc: Do not log error for netevents that need no action

2014-06-12 Thread Chad Dupuis


On Tue, 10 Jun 2014, Eddie Wai wrote:


On Fri, 2014-06-06 at 13:05 -0500, shirishpargaon...@gmail.com wrote:

From: Shirish Pargaonkar shirishpargaon...@gmail.com


Do not log error for netevents that need no action such as
NETDEV_REGISTER 0x0005, NETDEV_CHANGEADDR, and NETDEV_CHANGENAME.
It results in logging error messages such as these

[   35.315872] bnx2fc: Unknown netevent 5
[   35.315935] bnx2fc: Unknown netevent 8
[   35.353866] bnx2fc: Unknown netevent 10

and generating bug reports.
Remove logging this message as an ERROR instead of turning them into
either DEBUG or INFO level messages.


Signed-by: Shirish Pargaonkar spargaon...@suse.com
---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c 
b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 1d41f4b..c7388a8 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -856,7 +856,6 @@ static void bnx2fc_indicate_netevent(void *context, 
unsigned long event,
return;

default:
-   printk(KERN_ERR PFX Unknown netevent %ld, event);
return;
}


Thanks for fixing this, Shirish.  It makes sense to suppress the
unwanted netevent messages.

The patch looks good to me, but we should really get the current bnx2fc
maintainer from Qlogic to ACK this.

Acked-by: Eddie Wai eddie@broadcom.com


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



Looks fine to me as well.

Acked-by: Chad Dupuis chad.dup...@qlogic.com
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[GIT PULL] target updates for v3.16-rc1

2014-06-12 Thread Nicholas A. Bellinger
Hello Linus,

Here are the target-pending updates for v3.16-rc1 code.  Please go ahead
and pull from:

  git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git for-next

Note that you'll need to 'thread the needle' this time around, as there
are no less than four different merge conflicts / build breakages that
have been reported in linux-next (srf CC'ed) over the last weeks.

The first is with virtio-scsi between what has been merged in scsi.git
for virtio_scsi: use cmd_size, and the virtio-scsi: Enable DIF/DIX
modes in SCSI host LLD below.  (Adding Paolo + hch CC')

http://marc.info/?l=linux-kernelm=140083605208042w=2

The second is with qla2xxx between what has been merged in scsi.git
for qla2xxx: T10-Dif: add T10-PI support, and the qla2xxx: Convert to
percpu_ida session tag pre-allocation below.  (Adding Quinn CC')

http://marc.info/?l=linux-nextm=140109133803347w=2

The third is with vhost-scsi between what has been merged in vhost.git
for vhost: move acked_features to VQs, and the vhost/scsi: Enable T10
PI IOV - SGL memory mapping below.  (Adding MST CC')
 
http://marc.info/?l=linux-nextm=140237354814532w=2

And the last is with virtio-scsi between what has been merged in
virtio.git for virtio_scsi: don't call virtqueue_add_sgs(... GFP_NOIO)
holding spinlock., and the virtio-scsi: Enable DIF/DIX modes in SCSI
host LLD below.  (Adding Rusty CC')

http://marc.info/?l=linux-nextm=140254666208366w=2

All the conflicts aside, the highlights this round include:

  - Add support for T10 PI pass-through between vhost-scsi + virtio-scsi
(MST + Paolo + MKP + nab)
  - Add support for T10 PI in qla2xxx target mode (Quinn + MKP + hch + nab,
merged through scsi.git)
  - Add support for percpu-ida pre-allocation in qla2xxx target code
(Quinn + nab)
  - A number of iser-target fixes related to hardening the network portal
shutdown path (Sagi + Slava)
  - Fix response length residual handling for a number of control CDBs
(Roland + Christophe V.)
  - Various iscsi RFC conformance fixes in the CHAP authentication path
(Tejas and Calsoft folks + nab)
  - Return TASK_SET_FULL status for tcm_fc(FCoE) DataIn + Response failures
(Vasu + Jun + nab)
  - Fix long-standing ABORT_TASK + session reset hang (nab)
  - Convert iser-initiator + iser-target to include T10 bytes into EDTL
(Sagi + Or + MKP + Mike Christie)
  - Fix NULL pointer dereference regression related to XCOPY introduced
in v3.15 + CC'ed to v3.12.y (nab)

Thank you,

--nab

Christophe Vu-Brugier (4):
  target/spc: Simplify INQUIRY EVPD=0x80
  target: cleanup some boolean tests
  target/sbc: Remove sbc_check_valid_sectors()
  target/sbc: Check that the LBA and number of blocks are correct in
VERIFY

Nicholas Bellinger (17):
  virtio-scsi.h: Add virtio_scsi_cmd_req_pi + VIRTIO_SCSI_F_T10_PI bits
  vhost/scsi: Move sanity check into vhost_scsi_map_iov_to_sgl
  vhost/scsi: Add preallocation of protection SGLs
  vhost/scsi: Add T10 PI IOV - SGL memory mapping logic
  vhost/scsi: Enable T10 PI IOV - SGL memory mapping
  virtio-scsi: Enable DIF/DIX modes in SCSI host LLD
  qla2xxx: Convert to percpu_ida session tag pre-allocation
  iscsi-target: Reject zero-length payloads during SecurityNegotiation
  iscsi-target: Remove no-op from iscsit_tpg_del_portal_group
  iscsi-target: Reject mutual authentication with reflected CHAP_C
  tcm_fc: Generate TASK_SET_FULL status for DataIN failures
  tcm_fc: Generate TASK_SET_FULL status for response failures
  target: Set CMD_T_ACTIVE bit for Task Management Requests
  target: Use complete_all for se_cmd-t_transport_stop_comp
  iscsi-target: Fix ABORT_TASK + connection reset iscsi_queue_req
memory leak
  vhost-scsi: Include prot_bytes into expected data transfer length
  target: Fix NULL pointer dereference for XCOPY in target_put_sess_cmd

Roland Dreier (2):
  iscsi-target: Put length of failed allocation in error message
  target: Report correct response length for some commands

Sagi Grimberg (10):
  Target/iser: Bail from accept_np if np_thread is trying to close
  Target/iser: Fix hangs in connection teardown
  Target/iser: Improve cm events handling
  Target/iser: Wait for proper cleanup before unloading
  Target/iser: Gracefully reject T10-PI enabled connect request if not
supported
  Target/iser: Fix a wrong dereference in case discovery session is
over iser
  Target/iscsi: Fix sendtargets response pdu for iser transport
  scsi_cmnd: Introduce scsi_transfer_length helper
  libiscsi, iser: Adjust data_length to include protection information
  TARGET/sbc,loopback: Adjust command data length in case pi exists on
the wire

Tejas Vaykole (1):
  iscsi-target: Fix CHAP_A parameter list handling

 drivers/infiniband/ulp/iser/iser_initiator.c   |   34 +--
 drivers/infiniband/ulp/isert/ib_isert.c|   70 +++---
 drivers/infiniband/ulp/isert/ib_isert.h|2 +-
 drivers/scsi/Kconfig   |1 +
 drivers/scsi/libiscsi.c   

[PATCH v4] sg: O_EXCL and other lock handling

2014-06-12 Thread Douglas Gilbert

This is a re-presentation of a patch to the sg driver
whose v3 was sent in November 2013:
  http://www.spinics.net/lists/linux-scsi/msg69957.html

It addresses a problem reported by Vaughan Cao concerning
the correctness of the O_EXCL logic in the sg driver. POSIX
doesn't defined O_EXCL semantics on devices but allow only
one open file descriptor at a time per sg device is a rough
definition. The sg driver's semantics have been to wait
on an open() when O_NONBLOCK is not given and there are
O_EXCL headwinds. Nasty things can happen during that wait
such as the device being detached (removed). So multiple
locks are reworked in this patch making it large and hard
to break down into digestible bits.

This patch is against Linus's current git repository which
doesn't include any sg patches sent in the last few weeks.
Hence this patch touches as little as possible that it
doesn't need to and strips out most SCSI_LOG_TIMEOUT()
changes in v3 because Hannes said he was going to rework all
that stuff.

The sg3_utils package has several test programs written to
test this patch. See examples/sg_tst_excl*.cpp .

Not all the locks and flags in sg have been re-worked in
this patch, notably sg_request::done . That can wait for
a follow-up patch if this one meets with approval.

ChangeLog v4:
  - based on the current kernel tree: pre 3.16-rc1
  - strip out clean-ups in v3 that others are better
placed to do (e.g. debug/logging)
  - simplify open_wait_event() logic and add comment

ChangeLog v3 and earlier: see link in the first paragraph.


Could anyone confirm whether v3 of this patch has found its
way into any distro and/or been tested or used more widely?

Signed-off-by: Douglas Gilbert dgilb...@interlog.com


diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 53268aa..fc10ed5 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -51,6 +51,7 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #include linux/delay.h
 #include linux/blktrace_api.h
 #include linux/mutex.h
+#include linux/atomic.h
 #include linux/ratelimit.h
 
 #include scsi.h
@@ -102,18 +103,16 @@ static int scatter_elem_sz_prev = SG_SCATTER_SZ;
 
 #define SG_SECTOR_SZ 512
 
-static int sg_add(struct device *, struct class_interface *);
-static void sg_remove(struct device *, struct class_interface *);
-
-static DEFINE_SPINLOCK(sg_open_exclusive_lock);
+static int sg_add_device(struct device *, struct class_interface *);
+static void sg_remove_device(struct device *, struct class_interface *);
 
 static DEFINE_IDR(sg_index_idr);
 static DEFINE_RWLOCK(sg_index_lock);	/* Also used to lock
 			   file descriptor list for device */
 
 static struct class_interface sg_interface = {
-	.add_dev	= sg_add,
-	.remove_dev	= sg_remove,
+	.add_dev= sg_add_device,
+	.remove_dev = sg_remove_device,
 };
 
 typedef struct sg_scatter_hold { /* holding area for scsi scatter gather info */
@@ -146,8 +145,7 @@ typedef struct sg_request {	/* SG_MAX_QUEUE requests outstanding per file */
 } Sg_request;
 
 typedef struct sg_fd {		/* holds the state of a file descriptor */
-	/* sfd_siblings is protected by sg_index_lock */
-	struct list_head sfd_siblings;
+	struct list_head sfd_siblings;  /* protected by device's sfd_lock */
 	struct sg_device *parentdp;	/* owning device */
 	wait_queue_head_t read_wait;	/* queue read until command done */
 	rwlock_t rq_list_lock;	/* protect access to list in req_arr */
@@ -170,14 +168,15 @@ typedef struct sg_fd {		/* holds the state of a file descriptor */
 
 typedef struct sg_device { /* holds the state of each scsi generic device */
 	struct scsi_device *device;
-	wait_queue_head_t o_excl_wait;	/* queue open() when O_EXCL in use */
+	wait_queue_head_t open_wait;/* queue open() when O_EXCL present */
+	struct mutex open_rel_lock; /* held when in open() or release() */
 	int sg_tablesize;	/* adapter's max scatter-gather table size */
 	u32 index;		/* device index number */
-	/* sfds is protected by sg_index_lock */
 	struct list_head sfds;
-	volatile char detached;	/* 0-attached, 1-detached pending removal */
-	/* exclude protected by sg_open_exclusive_lock */
-	char exclude;		/* opened for exclusive access */
+	rwlock_t sfd_lock;  /* protect access to sfd list */
+	atomic_t detaching; /* 0-device usable, 1-device detaching */
+	atomic_t exclude;   /* 1-open(O_EXCL) succeeded and is active */
+	atomic_t open_cnt;  /* count of opens (perhaps  num(sfds) ) */
 	char sgdebug;		/* 0-off, 1-sense, 9-dump dev, 10- all devs */
 	struct gendisk *disk;
 	struct cdev * cdev;	/* char_dev [sysfs: /sys/cdev/major/sgn] */
@@ -208,7 +207,7 @@ static Sg_request *sg_add_request(Sg_fd * sfp);
 static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
 static int sg_res_in_use(Sg_fd * sfp);
 static Sg_device *sg_get_dev(int dev);
-static void sg_put_dev(Sg_device *sdp);
+static void sg_device_destroy(struct kref *kref);
 
 #define SZ_SG_HEADER sizeof(struct sg_header)
 #define 

[Announce] sg3_utils-1.39 available

2014-06-12 Thread Douglas Gilbert

sg3_utils is a package of command line utilities for sending
SCSI and some ATA commands to devices. This package targets
the Linux 3, 2.6 and 2.4 kernel series. It also has ports to
FreeBSD, Tru64, Solaris, and Windows (cygwin and MinGW).

There is usually around 6 months between versions of this
package but this time there is only 2.5 months due to the
volume of changes and additions, see the ChangeLog below.
This version tracks various changes made by www.t10.org
since April 2014.

For an overview of sg3_utils and downloads see this page:
http://sg.danny.cz/sg/sg3_utils.html
The sg_ses utility (for enclosure devices) is discussed at:
http://sg.danny.cz/sg/sg_ses.html
A full changelog can be found at:
http://sg.danny.cz/sg/p/sg3_utils.ChangeLog

A release announcement will be sent to freecode.com .


Changelog for sg3_utils-1.39 [20140612] [svn: r588]
  - sg_rep_zones: new utility for ZBC REPORT ZONES
  - sg_reset_wp: new utility, ZBC RESET WRITE POINTER
  - sg_ses: add --eiioe=auto|force option
- fix AES dpage element indexing problems
- add --readonly option
  - sg_write_buffer: add --bpw=CS option to call
write buffer multiple times for big blobs
  - sg_format: add --ip_def option to fully provision
  - sg_opcodes: add --mask option
  - sg_logs: add --in=FN option for log select params
- add --filter=PARC (parameter code)
- add --no_inq for suppress initial INQUIRY call
- add --readonly option
  - sg_persist: add --readonly option, environment
variable SG_PERSIST_IN_RDONLY sets ro on prin cmds
  - sg_inq: sync version descriptors dated 20105176
- suppress dev-id VPD messages so they only appear
  when --verbose is given
- add new SCSI_IDENT_*_ATA pair to --export output
  - sg_luns: add decoding for conglomerate LUNS
- add --lu_cong option to simulate the LU_CONG bit
  - sg_vpd: add --vendor=VP option, re-order vendor
specific pages, split lto into lto5 and lto6
- add Supported block lengths and protection types
  page (sbc4r01)
- add Block device characteristics extension
  page (sbc4r02)
  - sg_copy_results, sg_get_lba_status, sg_luns,
sg_read_buffer, sg_readcap, sg_referrals, sg_rtpg,
sg_sat_set_features, sg_sat_identify:
add --readonly option
  - sginfo: strip trailing spaces from INQUIRY text
  - sg_rbuf: add --echo option (to use echo buffer)
  - sg_lib: add sanitize command service action names
- add 'sense' categories for reservation conflict,
  data protect and protection information violations
- add sg_get_category_sense_str() to API
- change struct sg_simple_inquiry_resp::rmb to byte_1
- add initial zbc service actions
- dStrHex(Err): fix output truncation error
- linux, sg: support SCSI_PT_FLAGS_QUEUE_AT_TAIL and
  SCSI_PT_FLAGS_QUEUE_AT_HEAD (block layer queueing)
  - sg_lib_data: sync asc/ascq codes with T10 20140516
- sync operation code with T10 20140515
- add id string for SPC-5
  - scripts/59-scsi-sg3_utils.rules: removed
- functionality split into two scripts:
  55-scsi-sg3_id.rules + 58-scsi-sg3_symlink.rules
  - examples/sg_persist_tst.sh: add --exclusive option
  - win32: sg_scan, sg_ses and sg_log fixes
  - examples/sgq_dd: re-add old utility as example

Changelog for sg3_utils-1.38 [20140401] [svn: r563]


Doug Gilbert
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html