Re: [PATCH] infiniband-diags/src/ibccquery.c: Fix CACongestionSetting inputs
On Thu, 26 Apr 2012 14:43:13 -0700 Albert Chu wrote: > Make inputs to CACongestionSetting more like MAD packet, allowing > multiple SLs to be configured at one time. > > Signed-off-by: Albert Chu Thanks applied, Ira > --- > man/ibccconfig.8 |4 +- > src/ibccconfig.c | 61 - > 2 files changed, 30 insertions(+), 35 deletions(-) > > diff --git a/man/ibccconfig.8 b/man/ibccconfig.8 > index a0bea2d..38af5fd 100644 > --- a/man/ibccconfig.8 > +++ b/man/ibccconfig.8 > @@ -24,7 +24,7 @@ Current supported operations and their parameters: >CongestionKeyInfo (CK) > >SwitchCongestionSetting (SS) > > >SwitchPortCongestionSetting (SP) > > - CACongestionSetting (CS) > > + CACongestionSetting (CS) > >CongestionControlTable (CT) > ... > > .TP > @@ -75,7 +75,7 @@ attempted to be fulfilled, and will fail if it is not > possible. > .PP > ibccconfig SwitchCongestionSetting 2 0x1F 0x1F 0x0 0xF 8 0 0:0 1 > # Configure Switch Congestion Settings > .PP > -ibccconfig CACongestionSetting 1 0 0x3 0 150 1 0 0 # Configure CA > Congestion Settings > +ibccconfig CACongestionSetting 1 0 0x3 150 1 0 0 # Configure CA > Congestion Settings > .PP > ibccconfig CongestionControlTable 1 63 0 0:0 0:1 ...# Configure > first block of Congestion Control Table > .PP > diff --git a/src/ibccconfig.c b/src/ibccconfig.c > index c81b7fa..4ae5386 100644 > --- a/src/ibccconfig.c > +++ b/src/ibccconfig.c > @@ -80,7 +80,7 @@ static const match_rec_t match_tbl[] = { > {"SwitchPortCongestionSetting", "SP", switch_port_congestion_setting, 1, >" > "}, > {"CACongestionSetting", "CS", ca_congestion_setting, 0, > - " " > + "" >" "}, > {"CongestionControlTable", "CT", congestion_control_table, 0, >"..."}, > @@ -436,15 +436,14 @@ static char *ca_congestion_setting(ib_portid_t * dest, > char **argv, int argc) > uint8_t payload[IB_CC_DATA_SZ] = { 0 }; > uint32_t port_control; > uint32_t control_map; > - uint32_t sl; > uint32_t ccti_timer; > uint32_t ccti_increase; > uint32_t trigger_threshold; > uint32_t ccti_min; > - uint8_t *ptr; > char *errstr; > + int i; > > - if (argc != 7) > + if (argc != 6) > return "invalid number of parameters for CACongestionSetting"; > > if ((errstr = parseint(argv[0], &port_control, 0))) > @@ -453,29 +452,18 @@ static char *ca_congestion_setting(ib_portid_t * dest, > char **argv, int argc) > if ((errstr = parseint(argv[1], &control_map, 0))) > return errstr; > > - if ((errstr = parseint(argv[2], &sl, 0))) > - return errstr; > - > - if ((errstr = parseint(argv[3], &ccti_timer, 0))) > + if ((errstr = parseint(argv[2], &ccti_timer, 0))) > return errstr; > > - if ((errstr = parseint(argv[4], &ccti_increase, 0))) > + if ((errstr = parseint(argv[3], &ccti_increase, 0))) > return errstr; > > - if ((errstr = parseint(argv[5], &trigger_threshold, 0))) > + if ((errstr = parseint(argv[4], &trigger_threshold, 0))) > return errstr; > > - if ((errstr = parseint(argv[6], &ccti_min, 0))) > + if ((errstr = parseint(argv[5], &ccti_min, 0))) > return errstr; > > - if (sl > 15) > - return "invalid SL specified"; > - > - /* We are modifying only 1 SL at a time, so get the current config */ > - if (!cc_query_status_via(payload, dest, > IB_CC_ATTR_CA_CONGESTION_SETTING, > - 0, 0, NULL, srcport, cckey)) > - return "ca congestion setting query failed"; > - > mad_encode_field(payload, >IB_CC_CA_CONGESTION_SETTING_PORT_CONTROL_F, >&port_control); > @@ -484,23 +472,30 @@ static char *ca_congestion_setting(ib_portid_t * dest, > char **argv, int argc) >IB_CC_CA_CONGESTION_SETTING_CONTROL_MAP_F, >&control_map); > > - ptr = payload + 2 + 2 + sl * 8; > + for (i = 0; i < 16; i++) { > + uint8_t *ptr; > > - mad_encode_field(ptr, > - IB_CC_CA_CONGESTION_ENTRY_CCTI_TIMER_F, > - &ccti_timer); > + if (!(control_map & (0x1 << i))) > + continue; > > - mad_encode_field(ptr, > - IB_CC_CA_CONGESTION_ENTRY_CCTI_INCREASE_F, > - &ccti_increase); > + ptr = payload + 2 + 2 + i * 8; > > - mad_encode_field(ptr, > - IB_CC_CA_CONGESTION_ENTRY_TRIGGER_THRESHOLD_F, > - &trigger_threshold); > + mad_encode_field(ptr, > + IB_CC_CA_CONGESTION_ENTRY_CCTI_TIMER_F, > + &
[PATCH] infiniband-diags: remove SMI class registration where possible
Signed-off-by: Ira Weiny --- src/ibccconfig.c|5 ++--- src/ibccquery.c |5 ++--- src/ibdiag_common.c |4 src/ibping.c|5 ++--- src/perfquery.c |6 ++ src/vendstat.c |6 ++ 6 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/ibccconfig.c b/src/ibccconfig.c index 71d408a..0aae5c5 100644 --- a/src/ibccconfig.c +++ b/src/ibccconfig.c @@ -584,8 +584,7 @@ static int process_opt(void *context, int ch, char *optarg) int main(int argc, char **argv) { char usage_args[1024]; - int mgmt_classes[4] = - { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS, IB_CC_CLASS }; + int mgmt_classes[3] = { IB_SMI_CLASS, IB_SA_CLASS, IB_CC_CLASS }; ib_portid_t portid = { 0 }; char *err; op_fn_t *fn; @@ -632,7 +631,7 @@ int main(int argc, char **argv) if (!(fn = match_op(argv[0]))) IBERROR("operation '%s' not supported", argv[0]); - srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 4); + srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 3); if (!srcport) IBERROR("Failed to open '%s' port '%d'", ibd_ca, ibd_ca_port); diff --git a/src/ibccquery.c b/src/ibccquery.c index 2bf62fa..acdfe46 100644 --- a/src/ibccquery.c +++ b/src/ibccquery.c @@ -370,8 +370,7 @@ static int process_opt(void *context, int ch, char *optarg) int main(int argc, char **argv) { char usage_args[1024]; - int mgmt_classes[4] = - { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS, IB_CC_CLASS }; + int mgmt_classes[3] = { IB_SMI_CLASS, IB_SA_CLASS, IB_CC_CLASS }; ib_portid_t portid = { 0 }; char *err; op_fn_t *fn; @@ -412,7 +411,7 @@ int main(int argc, char **argv) if (!(fn = match_op(argv[0]))) IBERROR("operation '%s' not supported", argv[0]); - srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 4); + srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 3); if (!srcport) IBERROR("Failed to open '%s' port '%d'", ibd_ca, ibd_ca_port); diff --git a/src/ibdiag_common.c b/src/ibdiag_common.c index d53398b..a4e8cfa 100644 --- a/src/ibdiag_common.c +++ b/src/ibdiag_common.c @@ -598,6 +598,10 @@ int resolve_guid(char *ca_name, uint8_t ca_port, ib_portid_t *portid, return 0; } +/* + * Callers of this function should ensure their ibmad_port has been opened with + * IB_SA_CLASS as this function may require the SA to resolve addresses. + */ int resolve_portid_str(char *ca_name, uint8_t ca_port, ib_portid_t * portid, char *addr_str, enum MAD_DEST dest_type, ib_portid_t *sm_id, const struct ibmad_port *srcport) diff --git a/src/ibping.c b/src/ibping.c index 9d5c54b..205aa82 100644 --- a/src/ibping.c +++ b/src/ibping.c @@ -188,8 +188,7 @@ static int process_opt(void *context, int ch, char *optarg) int main(int argc, char **argv) { - int mgmt_classes[3] = - { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS }; + int mgmt_classes[1] = { IB_SA_CLASS }; int ping_class = IB_VENDOR_OPENIB_PING_CLASS; uint64_t rtt; char *err; @@ -212,7 +211,7 @@ int main(int argc, char **argv) if (!argc && !server) ibdiag_show_usage(); - srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 3); + srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 1); if (!srcport) IBERROR("Failed to open '%s' port '%d'", ibd_ca, ibd_ca_port); diff --git a/src/perfquery.c b/src/perfquery.c index 8835d3d..b58ad70 100644 --- a/src/perfquery.c +++ b/src/perfquery.c @@ -656,9 +656,7 @@ static int process_opt(void *context, int ch, char *optarg) int main(int argc, char **argv) { - int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS, - IB_PERFORMANCE_CLASS - }; + int mgmt_classes[2] = { IB_SA_CLASS, IB_PERFORMANCE_CLASS }; ib_portid_t portid = { 0 }; int mask = 0x; uint64_t ext_mask = 0x; @@ -725,7 +723,7 @@ int main(int argc, char **argv) mask = ext_mask; } - srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 4); + srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 2); if (!srcport) IBERROR("Failed to open '%s' port '%d'", ibd_ca, ibd_ca_port); diff --git a/src/vendstat.c b/src/vendstat.c index 413d202..78f6d7d 100644 --- a/src/vendstat.c +++ b/src/vendstat.c @@ -303,9 +303,7 @@ static int process_opt(void *context, int ch, char *optarg) int main(int argc, char **argv) { - int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS, - IB_MLX_VENDOR_CLASS - }; + int mgmt_classes[2] = { IB_SA_CLASS, IB_MLX_VENDOR_CLASS }; ib_portid_t portid = { 0
[PATCH] infiniband-diags: remove mad.h from ibstat
ibstat does not require libibmad directly. The change to ibdiag_common was required because of the conflict between the define in ib_types.h and enum in mad.h which presented itself after the fix in ibstat. Signed-off-by: Ira Weiny --- include/ibdiag_common.h |2 +- src/ibstat.c|1 - 2 files changed, 1 insertions(+), 2 deletions(-) diff --git a/include/ibdiag_common.h b/include/ibdiag_common.h index 09dc319..6645322 100644 --- a/include/ibdiag_common.h +++ b/include/ibdiag_common.h @@ -39,8 +39,8 @@ #ifndef _IBDIAG_COMMON_H_ #define _IBDIAG_COMMON_H_ -#include #include +#include #include extern int ibverbose; diff --git a/src/ibstat.c b/src/ibstat.c index ade5d7a..f953cf4 100644 --- a/src/ibstat.c +++ b/src/ibstat.c @@ -50,7 +50,6 @@ #include #include -#include #include -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH fixes] IB/core: fix generation of unmatched responses for unhandled MADs
On Sat, Apr 28, 2012 at 10:40 PM, Or Gerlitz wrote: > yep, makes sense, so what's in CT - is it "consider that"? I'm tempted to send a lmgtfy.com link, but afaict == as far as I can tell. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] libibmad/fields.c: Update counter offsets for PortExtendedSpeedsCounters
On Mon, 30 Apr 2012 13:26:12 -0400 Hal Rosenstock wrote: > > Per MgtWG RefID #4841 > > SyncHeaderErrorCounter, UnknownBlockCounter, and ErrorDetectionCounterLane > are now 16 bits rather than 8 bits. > > Signed-off-by: Hal Rosenstock Thanks, applied. Ira > --- > diff --git a/src/fields.c b/src/fields.c > index 21b0fa6..06bc9f0 100644 > --- a/src/fields.c > +++ b/src/fields.c > @@ -540,44 +540,44 @@ static const ib_field_t ib_mad_f[] = { >*/ > {BITSOFFS(8, 8), "PortSelect", mad_dump_uint}, > {64, 64, "CounterSelect", mad_dump_hex}, > - {BITSOFFS(128, 8), "SyncHeaderErrorCounter", mad_dump_uint}, > - {BITSOFFS(136, 8), "UnknownBlockCounter", mad_dump_uint}, > - {BITSOFFS(144, 8), "ErrorDetectionCounterLane0", mad_dump_uint}, > - {BITSOFFS(152, 8), "ErrorDetectionCounterLane1", mad_dump_uint}, > - {BITSOFFS(160, 8), "ErrorDetectionCounterLane2", mad_dump_uint}, > - {BITSOFFS(168, 8), "ErrorDetectionCounterLane3", mad_dump_uint}, > - {BITSOFFS(176, 8), "ErrorDetectionCounterLane4", mad_dump_uint}, > - {BITSOFFS(184, 8), "ErrorDetectionCounterLane5", mad_dump_uint}, > - {BITSOFFS(192, 8), "ErrorDetectionCounterLane6", mad_dump_uint}, > - {BITSOFFS(200, 8), "ErrorDetectionCounterLane7", mad_dump_uint}, > - {BITSOFFS(208, 8), "ErrorDetectionCounterLane8", mad_dump_uint}, > - {BITSOFFS(216, 8), "ErrorDetectionCounterLane9", mad_dump_uint}, > - {BITSOFFS(224, 8), "ErrorDetectionCounterLane10", mad_dump_uint}, > - {BITSOFFS(232, 8), "ErrorDetectionCounterLane11", mad_dump_uint}, > - {256, 32, "FECCorrectableBlockCtrLane0", mad_dump_uint}, > - {288, 32, "FECCorrectableBlockCtrLane1", mad_dump_uint}, > - {320, 32, "FECCorrectableBlockCtrLane2", mad_dump_uint}, > - {352, 32, "FECCorrectableBlockCtrLane3", mad_dump_uint}, > - {384, 32, "FECCorrectableBlockCtrLane4", mad_dump_uint}, > - {416, 32, "FECCorrectableBlockCtrLane5", mad_dump_uint}, > - {448, 32, "FECCorrectableBlockCtrLane6", mad_dump_uint}, > - {480, 32, "FECCorrectableBlockCtrLane7", mad_dump_uint}, > - {512, 32, "FECCorrectableBlockCtrLane8", mad_dump_uint}, > - {544, 32, "FECCorrectableBlockCtrLane9", mad_dump_uint}, > - {580, 32, "FECCorrectableBlockCtrLane10", mad_dump_uint}, > - {608, 32, "FECCorrectableBlockCtrLane11", mad_dump_uint}, > - {640, 32, "FECUncorrectableBlockCtrLane0", mad_dump_uint}, > - {672, 32, "FECUncorrectableBlockCtrLane1", mad_dump_uint}, > - {704, 32, "FECUncorrectableBlockCtrLane2", mad_dump_uint}, > - {736, 32, "FECUncorrectableBlockCtrLane3", mad_dump_uint}, > - {768, 32, "FECUncorrectableBlockCtrLane4", mad_dump_uint}, > - {800, 32, "FECUncorrectableBlockCtrLane5", mad_dump_uint}, > - {832, 32, "FECUncorrectableBlockCtrLane6", mad_dump_uint}, > - {864, 32, "FECUncorrectableBlockCtrLane7", mad_dump_uint}, > - {896, 32, "FECUncorrectableBlockCtrLane8", mad_dump_uint}, > - {928, 32, "FECUncorrectableBlockCtrLane9", mad_dump_uint}, > - {960, 32, "FECUncorrectableBlockCtrLane10", mad_dump_uint}, > - {992, 32, "FECUncorrectableBlockCtrLane11", mad_dump_uint}, > + {BITSOFFS(128, 16), "SyncHeaderErrorCounter", mad_dump_uint}, > + {BITSOFFS(144, 16), "UnknownBlockCounter", mad_dump_uint}, > + {BITSOFFS(160, 16), "ErrorDetectionCounterLane0", mad_dump_uint}, > + {BITSOFFS(176, 16), "ErrorDetectionCounterLane1", mad_dump_uint}, > + {BITSOFFS(192, 16), "ErrorDetectionCounterLane2", mad_dump_uint}, > + {BITSOFFS(208, 16), "ErrorDetectionCounterLane3", mad_dump_uint}, > + {BITSOFFS(224, 16), "ErrorDetectionCounterLane4", mad_dump_uint}, > + {BITSOFFS(240, 16), "ErrorDetectionCounterLane5", mad_dump_uint}, > + {BITSOFFS(256, 16), "ErrorDetectionCounterLane6", mad_dump_uint}, > + {BITSOFFS(272, 16), "ErrorDetectionCounterLane7", mad_dump_uint}, > + {BITSOFFS(288, 16), "ErrorDetectionCounterLane8", mad_dump_uint}, > + {BITSOFFS(304, 16), "ErrorDetectionCounterLane9", mad_dump_uint}, > + {BITSOFFS(320, 16), "ErrorDetectionCounterLane10", mad_dump_uint}, > + {BITSOFFS(336, 16), "ErrorDetectionCounterLane11", mad_dump_uint}, > + {352, 32, "FECCorrectableBlockCtrLane0", mad_dump_uint}, > + {384, 32, "FECCorrectableBlockCtrLane1", mad_dump_uint}, > + {416, 32, "FECCorrectableBlockCtrLane2", mad_dump_uint}, > + {448, 32, "FECCorrectableBlockCtrLane3", mad_dump_uint}, > + {480, 32, "FECCorrectableBlockCtrLane4", mad_dump_uint}, > + {512, 32, "FECCorrectableBlockCtrLane5", mad_dump_uint}, > + {544, 32, "FECCorrectableBlockCtrLane6", mad_dump_uint}, > + {576, 32, "FECCorrectableBlockCtrLane7", mad_dump_uint}, > + {608, 32, "FECCorrectableBlockCtrLane8", mad_dump_uint}, > + {640, 32, "FECCorrectableBlockCtrLane9", mad_dump_uint}, > + {672, 32, "FECCorrectableBlockCtrLane10", mad_dump_uint}, > + {704, 32, "FECCorrectableBlockCtrLane11", mad_dump_uint}, > + {736, 32, "FECUn
[PATCH] iw_cxgb4: Drop peer_abort when no endpoint found.
Log a warning and drop the abort message. Otherwise we will do a bogus wake_up() and crash. Signed-off-by: Steve Wise Cc: sta...@vger.kernel.org --- drivers/infiniband/hw/cxgb4/cm.c |6 ++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8f43143..b75d407 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2656,6 +2656,12 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(req); ep = lookup_tid(t, tid); + if (!ep) { + printk(KERN_WARNING MOD + "Abort on non-existent endpoint, tid %d\n", tid); + kfree_skb(skb); + return 0; + } if (is_neg_adv_abort(req->status)) { PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, ep->hwtid); -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] iw_cxgb4: don't BUG_ON if we get an abort with no endpoint found.
On Mon, Apr 30, 2012 at 12:33 PM, Steve Wise wrote: > Hey Roland, this patch is incorrect. It erroneously changes abort_rpl() > instead of peer_abort_intr(). I will resubmit the correct patch asap. OK, consider it dropped. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] IB/uverbs: Lock SRQ / CQ / PD objects in a consistent order
From: Roland Dreier Since XRC support was added, the uverbs code has locked SRQ, CQ and PD objects needed during QP and SRQ creation in different orders depending on the the code path. This leads to the (at least theoretical) possibility of deadlock, and triggers the lockdep splat below. Fix this by making sure we always lock the SRQ first, then CQs and finally the PD. == [ INFO: possible circular locking dependency detected ] 3.4.0-rc5+ #34 Not tainted --- ibv_srq_pingpon/2484 is trying to acquire lock: (SRQ-uobj){+.}, at: [] idr_read_uobj+0x2f/0x4d [ib_uverbs] but task is already holding lock: (CQ-uobj){+.}, at: [] idr_read_uobj+0x2f/0x4d [ib_uverbs] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (CQ-uobj){+.}: [] lock_acquire+0xbf/0xfe [] down_read+0x34/0x43 [] idr_read_uobj+0x2f/0x4d [ib_uverbs] [] idr_read_obj+0x9/0x19 [ib_uverbs] [] ib_uverbs_create_qp+0x180/0x684 [ib_uverbs] [] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [] vfs_write+0xa7/0xee [] sys_write+0x45/0x69 [] system_call_fastpath+0x16/0x1b -> #1 (PD-uobj){++}: [] lock_acquire+0xbf/0xfe [] down_read+0x34/0x43 [] idr_read_uobj+0x2f/0x4d [ib_uverbs] [] idr_read_obj+0x9/0x19 [ib_uverbs] [] __uverbs_create_xsrq+0x96/0x386 [ib_uverbs] [] ib_uverbs_detach_mcast+0x1cd/0x1e6 [ib_uverbs] [] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [] vfs_write+0xa7/0xee [] sys_write+0x45/0x69 [] system_call_fastpath+0x16/0x1b -> #0 (SRQ-uobj){+.}: [] __lock_acquire+0xa29/0xd06 [] lock_acquire+0xbf/0xfe [] down_read+0x34/0x43 [] idr_read_uobj+0x2f/0x4d [ib_uverbs] [] idr_read_obj+0x9/0x19 [ib_uverbs] [] ib_uverbs_create_qp+0x1e5/0x684 [ib_uverbs] [] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [] vfs_write+0xa7/0xee [] sys_write+0x45/0x69 [] system_call_fastpath+0x16/0x1b other info that might help us debug this: Chain exists of: SRQ-uobj --> PD-uobj --> CQ-uobj Possible unsafe locking scenario: CPU0CPU1 lock(CQ-uobj); lock(PD-uobj); lock(CQ-uobj); lock(SRQ-uobj); *** DEADLOCK *** 3 locks held by ibv_srq_pingpon/2484: #0: (QP-uobj){+.+...}, at: [] ib_uverbs_create_qp+0xe9/0x684 [ib_uverbs] #1: (PD-uobj){++}, at: [] idr_read_uobj+0x2f/0x4d [ib_uverbs] #2: (CQ-uobj){+.}, at: [] idr_read_uobj+0x2f/0x4d [ib_uverbs] stack backtrace: Pid: 2484, comm: ibv_srq_pingpon Not tainted 3.4.0-rc5+ #34 Call Trace: [] print_circular_bug+0x1f8/0x209 [] __lock_acquire+0xa29/0xd06 [] ? __idr_get_uobj+0x20/0x5e [ib_uverbs] [] ? idr_read_uobj+0x2f/0x4d [ib_uverbs] [] lock_acquire+0xbf/0xfe [] ? idr_read_uobj+0x2f/0x4d [ib_uverbs] [] ? lock_release+0x166/0x189 [] down_read+0x34/0x43 [] ? idr_read_uobj+0x2f/0x4d [ib_uverbs] [] idr_read_uobj+0x2f/0x4d [ib_uverbs] [] idr_read_obj+0x9/0x19 [ib_uverbs] [] ib_uverbs_create_qp+0x1e5/0x684 [ib_uverbs] [] ? lock_acquire+0xdb/0xfe [] ? lock_release_non_nested+0x94/0x213 [] ? might_fault+0x40/0x90 [] ? might_fault+0x40/0x90 [] ib_uverbs_write+0xb7/0xc2 [ib_uverbs] [] vfs_write+0xa7/0xee [] ? fget_light+0x3b/0x99 [] sys_write+0x45/0x69 [] system_call_fastpath+0x16/0x1b Reported-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 66 ++ 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 85231e2..ad750f3 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1423,13 +1423,6 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, } device = xrcd->device; } else { - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); - if (!pd || !scq) { - ret = -EINVAL; - goto err_put; - } - if (cmd.qp_type == IB_QPT_XRC_INI) { cmd.max_recv_wr = cmd.max_recv_sge = 0; } else { @@ -1440,13 +1433,24 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_put; }
[PATCH 1/2] IB/uverbs: Make lockdep output more readable
From: Roland Dreier Add names for our lockdep classes, so instead of having to decipher lockdep output with mysterious names: Chain exists of: key#14 --> key#11 --> key#13 lockdep will give us something nicer: Chain exists of: SRQ-uobj --> PD-uobj --> CQ-uobj Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 39 +++--- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4d27e4c..85231e2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -41,13 +41,18 @@ #include "uverbs.h" -static struct lock_class_key pd_lock_key; -static struct lock_class_key mr_lock_key; -static struct lock_class_key cq_lock_key; -static struct lock_class_key qp_lock_key; -static struct lock_class_key ah_lock_key; -static struct lock_class_key srq_lock_key; -static struct lock_class_key xrcd_lock_key; +struct uverbs_lock_class { + struct lock_class_key key; + charname[16]; +}; + +static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; +static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; +static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; +static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; +static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; +static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; +static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do {\ @@ -83,13 +88,13 @@ static struct lock_class_key xrcd_lock_key; */ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context, struct lock_class_key *key) + struct ib_ucontext *context, struct uverbs_lock_class *c) { uobj->user_handle = user_handle; uobj->context = context; kref_init(&uobj->ref); init_rwsem(&uobj->mutex); - lockdep_set_class(&uobj->mutex, key); + lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name); uobj->live= 0; } @@ -522,7 +527,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &pd_lock_key); + init_uobj(uobj, 0, file->ucontext, &pd_lock_class); down_write(&uobj->mutex); pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, @@ -750,7 +755,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, goto err_tree_mutex_unlock; } - init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key); + init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); down_write(&obj->uobject.mutex); @@ -947,7 +952,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &mr_lock_key); + init_uobj(uobj, 0, file->ucontext, &mr_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -1115,7 +1120,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key); + init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class); down_write(&obj->uobject.mutex); if (cmd.comp_channel >= 0) { @@ -1407,7 +1412,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); if (cmd.qp_type == IB_QPT_XRC_TGT) { @@ -1585,7 +1590,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); @@ -2272,7 +2277,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key); + init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -2476,7 +2481,7 @@ static int
Re: [PATCH 2/2] iw_cxgb4: don't BUG_ON if we get an abort with no endpoint found.
Hey Roland, this patch is incorrect. It erroneously changes abort_rpl() instead of peer_abort_intr(). I will resubmit the correct patch asap. On 04/27/2012 09:59 AM, Steve Wise wrote: Just log a warning. Signed-off-by: Steve Wise Cc: sta...@vger.kernel.org --- drivers/infiniband/hw/cxgb4/cm.c |5 - 1 files changed, 4 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8f43143..3d0e40a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1362,7 +1362,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - BUG_ON(!ep); + if (!ep) { + printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n"); + return 0; + } mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: ib_destroy_cm_id() versus cm callback race ?
> Are you sure that only one thread at a time will invoke a CM callback ? As > far as I can see cm_recv_handler() queues work without checking whether > any other work is ongoing. From drivers/infiniband/core/cm.c: All callbacks for a single ID should be serialized. (I think the listen ID is an exception, which would allow reporting multiple connection requests.) cm_work_handler dispatches work to specific handlers based on the type of CM message. The serialization doesn't occur until we know which ID we're dealing with and its state. See the cm_*_handler(work) calls. > What I have noticed could be explained by the following sequence of events: > * IB CM core receives a connection request and invokes the callback for event > IB_CM_REQ_RECEIVED. The flow should be: cm_work_handler() -> cm_req_handler() -> cm_match_req(), which increments work_count (to 1 in this case). > * That callback adds connection information to a global list (and keeps > running). Do you mean that the callback doesn't return to the CM? > * User requests shutdown and hence from another thread ib_send_cm_dreq() is > invoked. ib_send_cm_dreq() at this point should fail with EINVAL, as the connection state is not yet established. So, I don't think I'm quite following the operation yet. > * IB CM core receives a DREP message and invokes the callback for event > IB_CM_DREP_RECEIVED. That callback function gets confused because of the > concurrent connection state manipulations by the IB_CM_REQ_RECEIVED handler > (which is still running). The cm_drep_handler() code increments work_count and checks the result. If work_count was non-zero, then the DREP event should be queued on the id's work_list. The thread which set work_count to 1, will eventually invoke the callback for that event. - Sean
Re: ib_destroy_cm_id() versus cm callback race ?
On 04/30/12 18:29, Hefty, Sean wrote: >> That makes me wonder how it is prevented that two CM callbacks for the >> same CM ID run concurrently on different CPUs ? > > The callback code ends up looking like this: > > ret = atomic_inc_and_test(&cm_id_priv->work_count); > if (!ret) > list_add_tail(&work->list, &cm_id_priv->work_list); > spin_unlock_irq(&cm_id_priv->lock); > > if (ret) > cm_process_work(cm_id_priv, work); > > Only 1 thread will end up invoking callbacks to the user. Other events > end up being queued on the work_list for a given id. Are you sure that only one thread at a time will invoke a CM callback ? As far as I can see cm_recv_handler() queues work without checking whether any other work is ongoing. From drivers/infiniband/core/cm.c: static void cm_recv_handler(...) { [ ... ] work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, GFP_KERNEL); if (!work) { ib_free_recv_mad(mad_recv_wc); return; } INIT_DELAYED_WORK(&work->work, cm_work_handler); work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; work->port = port; queue_delayed_work(cm.wq, &work->work, 0); } What I have noticed could be explained by the following sequence of events: * IB CM core receives a connection request and invokes the callback for event IB_CM_REQ_RECEIVED. * That callback adds connection information to a global list (and keeps running). * User requests shutdown and hence from another thread ib_send_cm_dreq() is invoked. * IB CM core receives a DREP message and invokes the callback for event IB_CM_DREP_RECEIVED. That callback function gets confused because of the concurrent connection state manipulations by the IB_CM_REQ_RECEIVED handler (which is still running). Bart. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: ib_destroy_cm_id() versus cm callback race ?
> That makes me wonder how it is prevented that two CM callbacks for the > same CM ID run concurrently on different CPUs ? The callback code ends up looking like this: ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); Only 1 thread will end up invoking callbacks to the user. Other events end up being queued on the work_list for a given id. - Sean N�r��yb�X��ǧv�^�){.n�+{��ٚ�{ay�ʇڙ�,j��f���h���z��w��� ���j:+v���w�j�mzZ+�ݢj"��!�i
[PATCH] libibmad/fields.c: Update counter offsets for PortExtendedSpeedsCounters
Per MgtWG RefID #4841 SyncHeaderErrorCounter, UnknownBlockCounter, and ErrorDetectionCounterLane are now 16 bits rather than 8 bits. Signed-off-by: Hal Rosenstock --- diff --git a/src/fields.c b/src/fields.c index 21b0fa6..06bc9f0 100644 --- a/src/fields.c +++ b/src/fields.c @@ -540,44 +540,44 @@ static const ib_field_t ib_mad_f[] = { */ {BITSOFFS(8, 8), "PortSelect", mad_dump_uint}, {64, 64, "CounterSelect", mad_dump_hex}, - {BITSOFFS(128, 8), "SyncHeaderErrorCounter", mad_dump_uint}, - {BITSOFFS(136, 8), "UnknownBlockCounter", mad_dump_uint}, - {BITSOFFS(144, 8), "ErrorDetectionCounterLane0", mad_dump_uint}, - {BITSOFFS(152, 8), "ErrorDetectionCounterLane1", mad_dump_uint}, - {BITSOFFS(160, 8), "ErrorDetectionCounterLane2", mad_dump_uint}, - {BITSOFFS(168, 8), "ErrorDetectionCounterLane3", mad_dump_uint}, - {BITSOFFS(176, 8), "ErrorDetectionCounterLane4", mad_dump_uint}, - {BITSOFFS(184, 8), "ErrorDetectionCounterLane5", mad_dump_uint}, - {BITSOFFS(192, 8), "ErrorDetectionCounterLane6", mad_dump_uint}, - {BITSOFFS(200, 8), "ErrorDetectionCounterLane7", mad_dump_uint}, - {BITSOFFS(208, 8), "ErrorDetectionCounterLane8", mad_dump_uint}, - {BITSOFFS(216, 8), "ErrorDetectionCounterLane9", mad_dump_uint}, - {BITSOFFS(224, 8), "ErrorDetectionCounterLane10", mad_dump_uint}, - {BITSOFFS(232, 8), "ErrorDetectionCounterLane11", mad_dump_uint}, - {256, 32, "FECCorrectableBlockCtrLane0", mad_dump_uint}, - {288, 32, "FECCorrectableBlockCtrLane1", mad_dump_uint}, - {320, 32, "FECCorrectableBlockCtrLane2", mad_dump_uint}, - {352, 32, "FECCorrectableBlockCtrLane3", mad_dump_uint}, - {384, 32, "FECCorrectableBlockCtrLane4", mad_dump_uint}, - {416, 32, "FECCorrectableBlockCtrLane5", mad_dump_uint}, - {448, 32, "FECCorrectableBlockCtrLane6", mad_dump_uint}, - {480, 32, "FECCorrectableBlockCtrLane7", mad_dump_uint}, - {512, 32, "FECCorrectableBlockCtrLane8", mad_dump_uint}, - {544, 32, "FECCorrectableBlockCtrLane9", mad_dump_uint}, - {580, 32, "FECCorrectableBlockCtrLane10", mad_dump_uint}, - {608, 32, "FECCorrectableBlockCtrLane11", mad_dump_uint}, - {640, 32, "FECUncorrectableBlockCtrLane0", mad_dump_uint}, - {672, 32, "FECUncorrectableBlockCtrLane1", mad_dump_uint}, - {704, 32, "FECUncorrectableBlockCtrLane2", mad_dump_uint}, - {736, 32, "FECUncorrectableBlockCtrLane3", mad_dump_uint}, - {768, 32, "FECUncorrectableBlockCtrLane4", mad_dump_uint}, - {800, 32, "FECUncorrectableBlockCtrLane5", mad_dump_uint}, - {832, 32, "FECUncorrectableBlockCtrLane6", mad_dump_uint}, - {864, 32, "FECUncorrectableBlockCtrLane7", mad_dump_uint}, - {896, 32, "FECUncorrectableBlockCtrLane8", mad_dump_uint}, - {928, 32, "FECUncorrectableBlockCtrLane9", mad_dump_uint}, - {960, 32, "FECUncorrectableBlockCtrLane10", mad_dump_uint}, - {992, 32, "FECUncorrectableBlockCtrLane11", mad_dump_uint}, + {BITSOFFS(128, 16), "SyncHeaderErrorCounter", mad_dump_uint}, + {BITSOFFS(144, 16), "UnknownBlockCounter", mad_dump_uint}, + {BITSOFFS(160, 16), "ErrorDetectionCounterLane0", mad_dump_uint}, + {BITSOFFS(176, 16), "ErrorDetectionCounterLane1", mad_dump_uint}, + {BITSOFFS(192, 16), "ErrorDetectionCounterLane2", mad_dump_uint}, + {BITSOFFS(208, 16), "ErrorDetectionCounterLane3", mad_dump_uint}, + {BITSOFFS(224, 16), "ErrorDetectionCounterLane4", mad_dump_uint}, + {BITSOFFS(240, 16), "ErrorDetectionCounterLane5", mad_dump_uint}, + {BITSOFFS(256, 16), "ErrorDetectionCounterLane6", mad_dump_uint}, + {BITSOFFS(272, 16), "ErrorDetectionCounterLane7", mad_dump_uint}, + {BITSOFFS(288, 16), "ErrorDetectionCounterLane8", mad_dump_uint}, + {BITSOFFS(304, 16), "ErrorDetectionCounterLane9", mad_dump_uint}, + {BITSOFFS(320, 16), "ErrorDetectionCounterLane10", mad_dump_uint}, + {BITSOFFS(336, 16), "ErrorDetectionCounterLane11", mad_dump_uint}, + {352, 32, "FECCorrectableBlockCtrLane0", mad_dump_uint}, + {384, 32, "FECCorrectableBlockCtrLane1", mad_dump_uint}, + {416, 32, "FECCorrectableBlockCtrLane2", mad_dump_uint}, + {448, 32, "FECCorrectableBlockCtrLane3", mad_dump_uint}, + {480, 32, "FECCorrectableBlockCtrLane4", mad_dump_uint}, + {512, 32, "FECCorrectableBlockCtrLane5", mad_dump_uint}, + {544, 32, "FECCorrectableBlockCtrLane6", mad_dump_uint}, + {576, 32, "FECCorrectableBlockCtrLane7", mad_dump_uint}, + {608, 32, "FECCorrectableBlockCtrLane8", mad_dump_uint}, + {640, 32, "FECCorrectableBlockCtrLane9", mad_dump_uint}, + {672, 32, "FECCorrectableBlockCtrLane10", mad_dump_uint}, + {704, 32, "FECCorrectableBlockCtrLane11", mad_dump_uint}, + {736, 32, "FECUncorrectableBlockCtrLane0", mad_dump_uint}, + {768, 32, "FECUncorrectableBlockCtrLane1", mad_dump_uint}
fmr pool and remap doubt
Hi, I am trying to understand remapping functionality and fmr_pool.c. Looking back at old thread: http://lists.openfabrics.org/pipermail/general/2006-February/017198.html Can you please confirm whether my understanding is correct or not. 1. max_map_per_fmr indicates that - different memory pages can be remapped (again) without invoking unmap_fmr(). Basically previous mapping is over-written on every map_phys_fmr() call with new mapping without doing unmapping. 2. adapter indicates above limit - how many times old mapping can be overwritten (remapped) before invoking unmap_fmr(). 3. Remapping allows faster operation compare to map(), unmap() sequence, due to which unmap_fmr() is mostly done in worker threads in SDP, RDS etc consumers. Is above understanding correct? Regards, Parav Pandit -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 01/15] ib_srp: Enlarge block layer timeout
On Sun, 2012-04-22 at 16:01 +, Bart Van Assche wrote: > On 03/29/12 16:59, Dave Dillow wrote: > > I haven't chewed on the rest yet, but would like to see this one at > > least in 3.4 if possible. > If you have further comments about any of the patches in this series, > these are welcome. The 3.5 merge window isn't that far away anymore. I will attend to this before the end of the week, sooner if possible. -- Dave Dillow National Center for Computational Science Oak Ridge National Laboratory (865) 241-6602 office -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: Mellanox/RoCE
Hi, thanks for your comments. We use switch "BNT Virtual Fabric 10 Gb" which is DCB capable. I assume no link-level flow control mechanism (global pause/PFC) is enabled yet. I'll try that next and post the results. Klaus-Dieter Wacker IBM |> | From: | |> >--| |Boris Shpolyansky | >--| |> | To:| |> >--| |Hal Rosenstock , Klaus Wacker/Germany/IBM@IBMDE, | >--| |> | Cc:| |> >--| |"linux-rdma@vger.kernel.org" , Thomas Husemann , Nir Gal | >--| |> | Date: | |> >--| |27/04/2012 18:26 | >--| |> | Subject: | |> >--| |RE: Mellanox/RoCE | >--| Klaus, You may be experiencing frame drops on our Ethernet fabric. Is flow control (pause frames) enabled? RDMA traffic requires lossless layer-2 network, it is not designed to handle situation where multiple frames are re-transmitted due to packets being dropped. Boris Shpolyansky Director of Field Application Engineering, North America Mellanox Technologies Inc. 350 Oakmead Parkway, Suite 100 Sunnyvale, CA 94085 Tel.: (408) 916 0014 Fax: (408) 585 0314 Cell: (408) 834 9365 www.mellanox.com Mellanox on Twitter and Facebook -Original Message- From: linux-rdma-ow...@vger.kernel.org [ mailto:linux-rdma-ow...@vger.kernel.org] On Behalf Of Hal Rosenstock Sent: Friday, April 27, 2012 5:25 AM To: Klaus Wacker Cc: linux-rdma@vger.kernel.org Subject: Re: Mellanox/RoCE Hi Klaus, On 4/27/2012 8:07 AM, Klaus Wacker wrote: > > Hi, > i want to setup Mellanox/RoCE. My system is SUSE SLES11.2 with > Mellanox-OFED-1.5.3 > I have ping on the ethernet interface working and also ibv_ud_pingpong. > ibv_rc_pingpong fails: > bc2x03:~ # ibv_rc_pingpong -g 0 -s 128 -d mlx4_0 -i 2 10.100.10.24 > local address: LID 0x, QPN 0x600048, PSN 0x5e836d, GID > fe80::202:c9ff:fe4c:5aa3 > remote address: LID 0x, QPN 0x0c0048, PSN 0x2ced8f, GID > fe80::202:c9ff:fe4c:5aeb Failed status transport retry counter > exceeded (12) for wr_id 2 > > The ibstat info is: > bc2x03:~ # ibstat > CA 'mlx4_0' > CA type: MT26448 > Number of ports: 2 > Firmware version: 2.9.1100 > Hardware version: b0 > Node GUID: 0x0002c903004c5aa2 > System image GUID: 0x0002c903004c5aa5 > Port 1: > State: Active > Physical state: LinkUp > Rate: 10 > Base lid: 0 > LMC: 0 > SM lid: 0 > Capability mask: 0x0001 > Port GUID: 0x0202c9fffe4c5aa2 > Link layer: Ethernet > Port 2: > State: Active > Physical state: LinkUp > Rate: 10 > Base lid: 0 > LMC: 0 > SM lid: 0 > Capability mask: 0x0001 > Port GUID: 0x0202c9fffe4c5aa3 > Link layer: Ethernet > > The Capability mask shows weak settings, gives this an indication for >