Re: [PATCH] libibmad: Add support for PortSamplesResult

2013-02-15 Thread Ira Weiny
On Fri, 18 Jan 2013 18:05:21 -0500
Hal Rosenstock  wrote:

> 
> Signed-off-by: Hal Rosenstock 

Thanks, applied,
Ira

> ---
> diff --git a/include/infiniband/mad.h b/include/infiniband/mad.h
> index 0694dc4..e6de2c5 100644
> --- a/include/infiniband/mad.h
> +++ b/include/infiniband/mad.h
> @@ -1230,6 +1230,28 @@ enum MAD_FIELDS {
>   IB_SA_NR_NODEDESC_F,
>   IB_SA_NR_LAST_F,
>  
> + /*
> +  * PortSamplesResult fields
> +  */
> + IB_PSR_TAG_F,
> + IB_PSR_SAMPLE_STATUS_F,
> + IB_PSR_COUNTER0_F,
> + IB_PSR_COUNTER1_F,
> + IB_PSR_COUNTER2_F,
> + IB_PSR_COUNTER3_F,
> + IB_PSR_COUNTER4_F,
> + IB_PSR_COUNTER5_F,
> + IB_PSR_COUNTER6_F,
> + IB_PSR_COUNTER7_F,
> + IB_PSR_COUNTER8_F,
> + IB_PSR_COUNTER9_F,
> + IB_PSR_COUNTER10_F,
> + IB_PSR_COUNTER11_F,
> + IB_PSR_COUNTER12_F,
> + IB_PSR_COUNTER13_F,
> + IB_PSR_COUNTER14_F,
> + IB_PSR_LAST_F,
> +
>   IB_FIELD_LAST_  /* must be last */
>  };
>  
> @@ -1589,7 +1611,7 @@ MAD_EXPORT ib_mad_dump_fn
>  mad_dump_cc_switchportcongestionsettingelement, 
> mad_dump_cc_cacongestionsetting,
>  mad_dump_cc_cacongestionentry, mad_dump_cc_congestioncontroltable,
>  mad_dump_cc_congestioncontroltableentry, mad_dump_cc_timestamp,
> -mad_dump_classportinfo;
> +mad_dump_classportinfo, mad_dump_portsamples_result;
>  
>  MAD_EXPORT void mad_dump_fields(char *buf, int bufsz, void *val, int valsz,
>   int start, int end);
> diff --git a/src/dump.c b/src/dump.c
> index 9cf31da..7f3ef7d 100644
> --- a/src/dump.c
> +++ b/src/dump.c
> @@ -851,6 +851,11 @@ void mad_dump_portsamples_control(char *buf, int bufsz, 
> void *val, int valsz)
>   _dump_fields(buf, bufsz, val, IB_PSC_OPCODE_F, IB_PSC_LAST_F);
>  }
>  
> +void mad_dump_portsamples_result(char *buf, int bufsz, void *val, int valsz)
> +{
> + _dump_fields(buf, bufsz, val, IB_PSR_TAG_F, IB_PSR_LAST_F);
> +}
> +
>  void mad_dump_port_ext_speeds_counters(char *buf, int bufsz, void *val, int 
> valsz)
>  {
>   _dump_fields(buf, bufsz, val, IB_PESC_PORT_SELECT_F, IB_PESC_LAST_F);
> diff --git a/src/fields.c b/src/fields.c
> index 06bc9f0..d2b6792 100644
> --- a/src/fields.c
> +++ b/src/fields.c
> @@ -927,6 +927,28 @@ static const ib_field_t ib_mad_f[] = {
>   {352, 64 * 8, "NodeDesc", mad_dump_string},
>   {0, 0}, /* IB_SA_NR_LAST_F */
>  
> + /*
> +  * PortSamplesResult fields
> +  */
> + {BITSOFFS(0, 16), "Tag", mad_dump_hex},
> + {BITSOFFS(30, 2), "SampleStatus", mad_dump_hex},
> + {32, 32, "Counter0", mad_dump_uint},
> + {64, 32, "Counter1", mad_dump_uint},
> + {96, 32, "Counter2", mad_dump_uint},
> + {128, 32, "Counter3", mad_dump_uint},
> + {160, 32, "Counter4", mad_dump_uint},
> + {192, 32, "Counter5", mad_dump_uint},
> + {224, 32, "Counter6", mad_dump_uint},
> + {256, 32, "Counter7", mad_dump_uint},
> + {288, 32, "Counter8", mad_dump_uint},
> + {320, 32, "Counter9", mad_dump_uint},
> + {352, 32, "Counter10", mad_dump_uint},
> + {384, 32, "Counter11", mad_dump_uint},
> + {416, 32, "Counter12", mad_dump_uint},
> + {448, 32, "Counter13", mad_dump_uint},
> + {480, 32, "Counter14", mad_dump_uint},
> + {0, 0}, /* IB_PSR_LAST_F */
> +
>   {0, 0}  /* IB_FIELD_LAST_ */
>  
>  };
> diff --git a/src/libibmad.map b/src/libibmad.map
> index a4d4418..a1988be 100644
> --- a/src/libibmad.map
> +++ b/src/libibmad.map
> @@ -35,6 +35,7 @@ IBMAD_1.3 {
>   mad_dump_portcapmask;
>   mad_dump_portinfo;
>   mad_dump_portsamples_control;
> + mad_dump_portsamples_result;
>   mad_dump_perfcounters_port_op_rcv_counters;
>   mad_dump_perfcounters_port_flow_ctl_counters;
>   mad_dump_perfcounters_port_vl_op_packet;
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


-- 
Ira Weiny
Member of Technical Staff
Lawrence Livermore National Lab
925-423-8008
wei...@llnl.gov
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: NFS over RDMA crashing

2013-02-15 Thread J. Bruce Fields
On Mon, Feb 11, 2013 at 03:19:42PM +, Yan Burman wrote:
> > -Original Message-
> > From: J. Bruce Fields [mailto:bfie...@fieldses.org]
> > Sent: Thursday, February 07, 2013 18:42
> > To: Yan Burman
> > Cc: linux-...@vger.kernel.org; sw...@opengridcomputing.com; linux-
> > r...@vger.kernel.org; Or Gerlitz
> > Subject: Re: NFS over RDMA crashing
> > 
> > On Wed, Feb 06, 2013 at 05:24:35PM -0500, J. Bruce Fields wrote:
> > > On Wed, Feb 06, 2013 at 05:48:15PM +0200, Yan Burman wrote:
> > > > When killing mount command that got stuck:
> > > > ---
> > > >
> > > > BUG: unable to handle kernel paging request at 880324dc7ff8
> > > > IP: [] rdma_read_xdr+0x8bb/0xd40 [svcrdma] PGD
> > > > 1a0c063 PUD 32f82e063 PMD 32f2fd063 PTE 800324dc7161
> > > > Oops: 0003 [#1] PREEMPT SMP
> > > > Modules linked in: md5 ib_ipoib xprtrdma svcrdma rdma_cm ib_cm
> > iw_cm
> > > > ib_addr nfsd exportfs netconsole ip6table_filter ip6_tables
> > > > iptable_filter ip_tables ebtable_nat nfsv3 nfs_acl ebtables x_tables
> > > > nfsv4 auth_rpcgss nfs lockd autofs4 sunrpc target_core_iblock
> > > > target_core_file target_core_pscsi target_core_mod configfs 8021q
> > > > bridge stp llc ipv6 dm_mirror dm_region_hash dm_log vhost_net
> > > > macvtap macvlan tun uinput iTCO_wdt iTCO_vendor_support kvm_intel
> > > > kvm crc32c_intel microcode pcspkr joydev i2c_i801 lpc_ich mfd_core
> > > > ehci_pci ehci_hcd sg ioatdma ixgbe mdio mlx4_ib ib_sa ib_mad ib_core
> > > > mlx4_en mlx4_core igb hwmon dca ptp pps_core button dm_mod ext3
> > jbd
> > > > sd_mod ata_piix libata uhci_hcd megaraid_sas scsi_mod CPU 6
> > > > Pid: 4744, comm: nfsd Not tainted 3.8.0-rc5+ #4 Supermicro
> > > > X8DTH-i/6/iF/6F/X8DTH
> > > > RIP: 0010:[]  []
> > > > rdma_read_xdr+0x8bb/0xd40 [svcrdma]
> > > > RSP: 0018:880324c3dbf8  EFLAGS: 00010297
> > > > RAX: 880324dc8000 RBX: 0001 RCX: 880324dd8428
> > > > RDX: 880324dc7ff8 RSI: 880324dd8428 RDI: 81149618
> > > > RBP: 880324c3dd78 R08: 60f9c860 R09: 0001
> > > > R10: 880324dd8000 R11: 0001 R12: 8806299dcb10
> > > > R13: 0003 R14: 0001 R15: 0010
> > > > FS:  () GS:88063fc0()
> > > > knlGS:
> > > > CS:  0010 DS:  ES:  CR0: 8005003b
> > > > CR2: 880324dc7ff8 CR3: 01a0b000 CR4: 07e0
> > > > DR0:  DR1:  DR2: 
> > > > DR3:  DR6: 0ff0 DR7: 0400
> > > > Process nfsd (pid: 4744, threadinfo 880324c3c000, task
> > > > 88033055)
> > > > Stack:
> > > >  880324c3dc78 880324c3dcd8 0282 880631cec000
> > > >  880324dd8000 88062ed33040 000124c3dc48 880324dd8000
> > > >  88062ed33058 880630ce2b90 8806299e8000 0003
> > > > Call Trace:
> > > >  [] svc_rdma_recvfrom+0x3ee/0xd80 [svcrdma]
> > > > [] ? try_to_wake_up+0x2f0/0x2f0
> > > > [] svc_recv+0x3ef/0x4b0 [sunrpc]
> > > > [] ? nfsd_svc+0x740/0x740 [nfsd]
> > > > [] nfsd+0xad/0x130 [nfsd]  [] ?
> > > > nfsd_svc+0x740/0x740 [nfsd]  [] kthread+0xd6/0xe0
> > > > [] ? __init_kthread_worker+0x70/0x70
> > > > [] ret_from_fork+0x7c/0xb0  [] ?
> > > > __init_kthread_worker+0x70/0x70
> > > > Code: 63 c2 49 8d 8c c2 18 02 00 00 48 39 ce 77 e1 49 8b 82 40 0a 00
> > > > 00 48 39 c6 0f 84 92 f7 ff ff 90 48 8d 50 f8 49 89 92 40 0a 00 00
> > > > <48> c7 40 f8 00 00 00 00 49 8b 82 40 0a 00 00 49 3b 82 30 0a 00 RIP
> > > > [] rdma_read_xdr+0x8bb/0xd40 [svcrdma]  RSP
> > > > 
> > > > CR2: 880324dc7ff8
> > > > ---[ end trace 06d0384754e9609a ]---
> > > >
> > > >
> > > > It seems that commit afc59400d6c65bad66d4ad0b2daf879cbff8e23e
> > > > "nfsd4: cleanup: replace rq_resused count by rq_next_page pointer"
> > > > is responsible for the crash (it seems to be crashing in
> > > > net/sunrpc/xprtrdma/svc_rdma_recvfrom.c:527)
> > > > It may be because I have CONFIG_DEBUG_SET_MODULE_RONX and
> > > > CONFIG_DEBUG_RODATA enabled. I did not try to disable them yet.
> > > >
> > > > When I moved to commit 79f77bf9a4e3dd5ead006b8f17e7c4ff07d8374e I
> > > > was no longer getting the server crashes, so the reset of my tests
> > > > were done using that point (it is somewhere in the middle of
> > > > 3.7.0-rc2).
> > >
> > > OK, so this part's clearly my fault--I'll work on a patch, but the
> > > rdma's use of the ->rq_pages array is pretty confusing.
> > 
> > Does this help?
> > 
> > They must have added this for some reason, but I'm not seeing how it could
> > have ever done anything
> > 
> > --b.
> > 
> > diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> > b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> > index 0ce7552..e8f25ec 100644
> > --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> > +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
> > @@ -520,13 +520,6 @@ next_sge:
> > for (ch_no = 0; &rqst

[PATCH] IB/srp: Fail I/O requests if the transport is offline

2013-02-15 Thread Bart Van Assche
If an SRP target is no longer reachable and srp_reset_host()
fails to reconnect then ib_srp will invoke scsi_remove_host().
That function will invoke __scsi_remove_device() for each LUN.
And that last function will change the device state from
SDEV_TRANSPORT_OFFLINE into SDEV_CANCEL. Certain user space
software, e.g. older versions of multipathd, continue queueing
I/O to SCSI devices that are in the SDEV_CANCEL state. If these
I/O requests are submitted as SG_IO that means that the
REQ_PREEMPT flag will be set and hence that these requests will
be passed to srp_queuecommand(). These requests will time out.
If new requests are queued fast enough from user space these
active requests will prevent __scsi_remove_device() to finish.
Avoid this by failing I/O requests in the SDEV_CANCEL state if
the transport is offline. Introduce a new variable to keep
track of the transport state instead of failing requests if
(!target->connected || target->qp_in_error) such that the SCSI
error handler has a chance to retry commands after a transport
layer failure occurred.

Signed-off-by: Bart Van Assche 
Cc: David Dillow 
Cc: Or Gerlitz 
Cc: Vu Pham 
---
 drivers/infiniband/ulp/srp/ib_srp.c |7 +++
 drivers/infiniband/ulp/srp/ib_srp.h |1 +
 2 files changed, 8 insertions(+)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c 
b/drivers/infiniband/ulp/srp/ib_srp.c
index 8a7eb9f..b34752d 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -734,6 +734,7 @@ static int srp_reconnect_target(struct srp_target_port 
*target)
 
scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
SDEV_TRANSPORT_OFFLINE);
+   target->transport_offline = ret != 0;
 
if (ret)
goto err;
@@ -1353,6 +1354,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, 
struct scsi_cmnd *scmnd)
unsigned long flags;
int len;
 
+   if (unlikely(target->transport_offline)) {
+   scmnd->result = DID_NO_CONNECT << 16;
+   scmnd->scsi_done(scmnd);
+   return 0;
+   }
+
spin_lock_irqsave(&target->lock, flags);
iu = __srp_get_tx_iu(target, SRP_IU_CMD);
if (!iu)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h 
b/drivers/infiniband/ulp/srp/ib_srp.h
index de2d0b3..66fbedd 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -140,6 +140,7 @@ struct srp_target_port {
unsigned intcmd_sg_cnt;
unsigned intindirect_size;
boolallow_ext_sg;
+   booltransport_offline;
 
/* Everything above this point is used in the hot path of
 * command processing. Try to keep them packed into cachelines.
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html