Re: SDP zcopy performance measurement
Output when running LD_PRELOAD=/usr/lib64/libsdp.so netperf -L 172.31.134.10 -H 172.31.134.1 -c -C -- -m 1048576 : netperf: data send error: Connection reset by peer len was -1 while LD_PRELOAD=/usr/lib64/libsdp.so netperf -L 172.31.134.10 -H 172.31.134.1 -c -C -- -m 65436 works fine. System Configuration: RHEL 5.5 OFED 1.5.2 netperf 2.4.5 Please help. On Mon, Apr 25, 2011 at 5:14 PM, bhavinkumar thumar bhawinkumar.thu...@gmail.com wrote: I have tried with following cmd: LD_PRELOAD=libsdp.so netperf -L 172.31.133.1 -H 172.31.132.1 -c -C -- -m 65536. it works but when I increase the message size (-m 655360), it doesn't work! so I don't think that the problem is due to the absence of full path for libsdp.so -Bhavin On Mon, Apr 25, 2011 at 5:05 PM, Bart Van Assche bvanass...@acm.org wrote: On Mon, Apr 25, 2011 at 1:24 PM, bhavinkumar thumar bhawinkumar.thu...@gmail.com wrote: I am having issue with SDP performance measurement. I tried Netperf with following command: LD_PRELOAD=libsdp.so netperf -L 172.31.133.1 -H 172.31.132.1 -c -C -- -m 655360 this doesn't work. sdp_zcopy_thresh is set to 65536. Have you already tried to specify the full path for libsdp.so in LD_PRELOAD=... ? Bart. -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Problem with ib_write_bw
Hi, #ib_write_bw -m 4096 -s 1048576 -- RDMA_Write BW Test Number of qp's running 1 Connection type : RC Each Qp will post up to 100 messages each time Inline data is used up to 0 bytes message Link type is IB Requested mtu is higher than active mtu Changing to active mtu Mtu : 2048 local address: LID 0x0e QPN 0x1f0405 PSN 0xcb8369 RKey 0x56002700 VAddr 0x002b47c7575000 Couldn't parse line 000d:230405:9017a6:fa042e00:2b61c9213000:80:54 Unable to Read from the socket Failed to exchange date between server and clients what's wrong? Thanks Bhavin -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] opensm/osm_pkey_mgr.c: Pack switch peer port PKey tables
Hi Hal, On 15:39 Mon 11 Apr , Hal Rosenstock wrote: In pkey_mgr_update_peer_port, rather than mirror the end port PKey table, pack the peer switch's port PKey table to eliminate any holes. Why don't you want to pack port's pkey table itself and mirror it packed to the peer's port? Signed-off-by: Hal Rosenstock h...@mellanox.com --- diff --git a/opensm/osm_pkey_mgr.c b/opensm/osm_pkey_mgr.c index f6bc9d1..06d9b1e 100644 --- a/opensm/osm_pkey_mgr.c +++ b/opensm/osm_pkey_mgr.c @@ -398,6 +398,35 @@ static uint16_t last_used_pkey_index(const osm_port_t * const p_port, return index; } +static int update_peer_block(osm_log_t * p_log, osm_sm_t * sm, + osm_physp_t * peer, + osm_pkey_tbl_t * p_peer_pkey_tbl, + ib_pkey_table_t * new_peer_block, + uint16_t peer_block_idx, osm_node_t * p_node) +{ + int ret = 0; + ib_pkey_table_t *peer_block; + + peer_block = osm_pkey_tbl_block_get(p_peer_pkey_tbl, peer_block_idx); + if (!peer_block || + memcmp(peer_block, new_peer_block, sizeof(*peer_block))) { + if (pkey_mgr_update_pkey_entry(sm, peer, new_peer_block, +peer_block_idx) != IB_SUCCESS) { + OSM_LOG(p_log, OSM_LOG_ERROR, ERR 0509: + pkey_mgr_update_pkey_entry() failed to update + pkey table block %d for node 0x%016 + PRIx64 port %u (%s)\n, + peer_block_idx, + cl_ntoh64(osm_node_get_node_guid(p_node)), + osm_physp_get_port_num(peer), + p_node-print_desc); + ret = -1; + } + } + + return ret; +} + static int pkey_mgr_update_peer_port(osm_log_t * p_log, osm_sm_t * sm, const osm_subn_t * p_subn, const osm_port_t * const p_port, @@ -405,15 +434,16 @@ static int pkey_mgr_update_peer_port(osm_log_t * p_log, osm_sm_t * sm, { osm_physp_t *p_physp, *peer; osm_node_t *p_node; - ib_pkey_table_t *block, *peer_block; + ib_pkey_table_t *block; const osm_pkey_tbl_t *p_pkey_tbl; osm_pkey_tbl_t *p_peer_pkey_tbl; - uint16_t block_index; + uint16_t block_index, peer_block_idx; uint16_t peer_max_blocks; uint16_t last_index; - ib_api_status_t status = IB_SUCCESS; - ib_pkey_table_t empty_block; - int ret = 0; + ib_pkey_table_t new_peer_block; + uint16_t pkey_idx, peer_pkey_idx; + ib_net16_t pkey; + int ret = 0, loop_exit = 0; p_physp = p_port-p_physp; if (!p_physp) @@ -425,70 +455,80 @@ static int pkey_mgr_update_peer_port(osm_log_t * p_log, osm_sm_t * sm, if (!p_node-sw || !p_node-sw-switch_info.enforce_cap) return 0; - p_pkey_tbl = osm_physp_get_pkey_tbl(p_physp); - peer_max_blocks = pkey_mgr_get_physp_max_blocks(peer); - if (peer_max_blocks p_pkey_tbl-used_blocks) { - OSM_LOG(p_log, OSM_LOG_ERROR, ERR 0508: - Not enough pkey blocks (%u %u used) on switch 0x%016 - PRIx64 port %u (%s). Clearing Enforcement bit\n, - peer_max_blocks, p_pkey_tbl-used_blocks, - cl_ntoh64(osm_node_get_node_guid(p_node)), - osm_physp_get_port_num(peer), - p_node-print_desc); - enforce = FALSE; - ret = -1; - } else if (peer_max_blocks == p_pkey_tbl-used_blocks) { - /* Is last used pkey index beyond switch peer port capacity ? */ - last_index = (peer_max_blocks - 1) * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + - last_used_pkey_index(p_port, p_pkey_tbl); - if (cl_ntoh16(p_node-sw-switch_info.enforce_cap) = last_index) { - OSM_LOG(p_log, OSM_LOG_ERROR, ERR 0507: - Not enough pkey entries (%u = %u) on switch 0x%016 - PRIx64 port %u (%s). Clearing Enforcement bit\n, - cl_ntoh16(p_node-sw-switch_info.enforce_cap), - last_index, - cl_ntoh64(osm_node_get_node_guid(p_node)), - osm_physp_get_port_num(peer), - p_node-print_desc); - enforce = FALSE; - ret = -1; - } + if (enforce == FALSE) { + pkey_mgr_enforce_partition(p_log, sm, peer, FALSE); + return -1; Though it doesn't affect anything, why do you return -1 here? } - if (pkey_mgr_enforce_partition(p_log, sm, peer, enforce)) - ret =
Re: [PATCH 1/7] libibumad: provide MAD definitions with libibumad
Hi Sean, I want to make sure I didn't miss any libibumad patchaes. There should be 8 patches: [PATCH 1/7] libibumad: provide MAD definitions with libibumad http://www.spinics.net/lists/linux-rdma/msg07997.html [PATCH 2/7] libibumad: add SA MAD definitions to umad http://www.spinics.net/lists/linux-rdma/msg07998.html [PATCH 3/7] libibumad: Add basic SM definitions to umad http://www.spinics.net/lists/linux-rdma/msg07999.html [PATCH 4/7] libibumad: Add CM definitions to umad http://www.spinics.net/lists/linux-rdma/msg08000.html RE: [PATCH 1/7] libibumad: Add new umad header files to release http://www.spinics.net/lists/linux-rdma/msg08001.html [PATCH 6/7] libibumad: Define ntohll/htonll http://www.spinics.net/lists/linux-rdma/msg08002.html [PATCH 7/7] libibumad: Define data type to indicate values are in big-endian http://www.spinics.net/lists/linux-rdma/msg08003.html RE: [PATCH 5/7] libibumad: Add new umad header files to release http://www.spinics.net/lists/linux-rdma/msg08004.html Correct me if I'm wrong. -- Alex -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2] OpenSM torus routing order list
Enables to define list of switch ports so the SM will go over this list when creating a routing. It helps balancing links load on some communication patterns where multipile links connect between the switches. Signed-off-by: David McMillen da...@systemfabricworks.com Signed-off-by: Alex Netes ale...@mellanox.com --- Changes since v1: Added detailed description in man/torus-2QoS.conf.5.in Added error messages on configuration file parsing Fixed some styling --- man/torus-2QoS.conf.5.in | 26 ++- opensm/osm_torus.c | 76 -- 2 files changed, 97 insertions(+), 5 deletions(-) diff --git a/man/torus-2QoS.conf.5.in b/man/torus-2QoS.conf.5.in index 147a7b1..df38ad3 100644 --- a/man/torus-2QoS.conf.5.in +++ b/man/torus-2QoS.conf.5.in @@ -62,7 +62,7 @@ see \fBUNICAST ROUTING\fR in torus-2QoS(8). \fIsw0_GUID sw1_GUID \fR .RS -These keywords are used to seed the torus/mesh topolgy. +These keywords are used to seed the torus/mesh topology. For example, xp_link 0x2000 0x2001 specifies that a link from the switch with node GUID 0x2000 to the switch with node GUID 0x2001 would point in the positive x direction, @@ -78,7 +78,7 @@ for torus dimensions of radix four (see \fBTOPOLOGY DISCOVERY\fR in torus-2QoS(8)). For such cases both the positive and negative coordinate directions must be specified. .P -Based on the topology specifed via the \fBtorus\fR/\fBmesh\fR keyword, +Based on the topology specified via the \fBtorus\fR/\fBmesh\fR keyword, torus-2QoS will detect and log when it has insufficient seed configuration. .RE . @@ -140,6 +140,24 @@ parameter needs to be increased. If this keyword appears multiple times, the last instance prevails. .RE . +.P +\fBport_order +\fIp1 p2 p3 ... +\fR +.RS +This keyword specifies the order in which CA ports on a destination switch +are visited when computing routes. When the fabric contains switches connected +with multiple parallel links, routes are distributed in a round-robin fashion +across such links, and so changing the order that CA ports are visited changes +the distribution of routes across such links. This may be advantageous for +some specific traffic patterns. +.P +The default is to visit CA ports in increasing port +order on destination switches. +.P +Duplicate values in the list will be ignored. +.RE +. .SH EXAMPLE . \f(RC @@ -171,6 +189,10 @@ z_dateline -1 # back to its original position. # on a host attached to a switch from the second seed. # Both instances should use this torus-2QoS.conf to ensure # path SL values do not change in the event of SM failover. + +# port_order defines the order on which the ports would be +# chosen for routing. +port_order 7 10 8 11 9 12 25 28 26 29 27 30 .fi \fR . diff --git a/opensm/osm_torus.c b/opensm/osm_torus.c index add3cf9..cd3d490 100644 --- a/opensm/osm_torus.c +++ b/opensm/osm_torus.c @@ -59,6 +59,8 @@ #define PORTGRP_MAX_PORTS16 #define SWITCH_MAX_PORTGRPS (1 + 2 * TORUS_MAX_DIM) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + typedef ib_net64_t guid_t; /* @@ -287,6 +289,8 @@ struct torus { unsigned seed_cnt, seed_idx; unsigned x_sz, y_sz, z_sz; + unsigned port_order[IB_NODE_NUM_PORTS_MAX+1]; + unsigned sw_pool_sz; unsigned link_pool_sz; unsigned seed_sz; @@ -844,6 +848,56 @@ out: } static +bool parse_port(unsigned *pnum, const char *parse_sep) +{ + char *val, *nextchar; + + val = strtok(NULL, parse_sep); + if (!val) + return false; + *pnum = strtoul(val, nextchar, 0); + if (*pnum IB_NODE_NUM_PORTS_MAX) { + *pnum = 0; + } + return true; +} + +static +bool parse_port_order(struct torus *t, const char *parse_sep) +{ + unsigned i, j, k, n; + + for (i = 0; i ARRAY_SIZE(t-port_order); i++) { + if (!parse_port((t-port_order[i]), parse_sep)) { + OSM_LOG(t-osm-log, OSM_LOG_ERROR, + Error: cannot parse port_order); + break; + } + for (j = 0; j i; j++) { + if (t-port_order[j] == t-port_order[i]) { + OSM_LOG(t-osm-log, OSM_LOG_ERROR, + Error: ignoring duplicate port %u in +port_order parsing\n, + t-port_order[j]); + i--;/* Ignore duplicate port number */ + break; + } + } + } + + n = i; + for (j = 0; j ARRAY_SIZE(t-port_order); j++) { + for (k = 0; k i; k++) + if (t-port_order[k] == j) + break; + if (k = i) + t-port_order[n++] = j; + } + + return true; +} + +static bool
RE: [PATCH 1/7] libibumad: provide MAD definitions with libibumad
I want to make sure I didn't miss any libibumad patchaes. There should be 8 patches: There are 7 [PATCH 1/7] libibumad: provide MAD definitions with libibumad http://www.spinics.net/lists/linux-rdma/msg07997.html [PATCH 2/7] libibumad: add SA MAD definitions to umad http://www.spinics.net/lists/linux-rdma/msg07998.html [PATCH 3/7] libibumad: Add basic SM definitions to umad http://www.spinics.net/lists/linux-rdma/msg07999.html [PATCH 4/7] libibumad: Add CM definitions to umad http://www.spinics.net/lists/linux-rdma/msg08000.html RE: [PATCH 1/7] libibumad: Add new umad header files to release http://www.spinics.net/lists/linux-rdma/msg08001.html This should have been 5/7. It is the same as: [PATCH 6/7] libibumad: Define ntohll/htonll http://www.spinics.net/lists/linux-rdma/msg08002.html [PATCH 7/7] libibumad: Define data type to indicate values are in big-endian http://www.spinics.net/lists/linux-rdma/msg08003.html RE: [PATCH 5/7] libibumad: Add new umad header files to release http://www.spinics.net/lists/linux-rdma/msg08004.html this one, with the correct numbering. - Sean -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] infiniband-diags/vendstat: Fixed general info query
On Sat, 30 Apr 2011 23:28:57 -0700 Eli Dorfman el...@dev.mellanox.co.il wrote: On 04/28/2011 05:51 PM, Hal Rosenstock wrote: On 4/28/2011 5:09 AM, Eli Dorfman wrote: Fixed general info query There was no output when query succeeded Do you mean failed ? No I meant succeeded. Since the IBERROR was missing the print of GeneralInfo is skipped. If it fails then the if (general_info) code is reached but it will print garbage. Thanks applied, Ira Eli -- Hal Signed-off-by: Eli Dorfman el...@mellanox.com --- src/vendstat.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/src/vendstat.c b/src/vendstat.c index 92a90c8..8dbd1ee 100644 --- a/src/vendstat.c +++ b/src/vendstat.c @@ -375,6 +375,7 @@ int main(int argc, char **argv) gi = (is3_general_info_t *) buf; if (do_vendor(portid, srcport, IB_MLX_VENDOR_CLASS, IB_MAD_METHOD_GET, IB_MLX_IS3_GENERAL_INFO, 0, gi)) + IBERROR(generalinfo query); if (general_info) { /* dump IS3 or IS4 general info here */ -- Ira Weiny Math Programmer/Computer Scientist Lawrence Livermore National Lab 925-423-8008 wei...@llnl.gov -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2] OpenSM torus routing order list
Alex Netes wrote: Enables to define list of switch ports so the SM will go over this list when creating a routing. It helps balancing links load on some communication patterns where multipile links connect between the switches. Signed-off-by: David McMillen da...@systemfabricworks.com Signed-off-by: Alex Netes ale...@mellanox.com Acked-by: Jim Schutt jasc...@sandia.gov --- Changes since v1: Added detailed description in man/torus-2QoS.conf.5.in Added error messages on configuration file parsing Fixed some styling --- man/torus-2QoS.conf.5.in | 26 ++- opensm/osm_torus.c | 76 -- 2 files changed, 97 insertions(+), 5 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] opensm/osm_pkey_mgr.c: Pack switch peer port PKey tables
Hi Alex, On 5/2/2011 8:12 AM, Alex Netes wrote: Hi Hal, On 15:39 Mon 11 Apr , Hal Rosenstock wrote: In pkey_mgr_update_peer_port, rather than mirror the end port PKey table, pack the peer switch's port PKey table to eliminate any holes. Why don't you want to pack port's pkey table itself and mirror it packed to the peer's port? It's disruptive to change end port pkey indices whereas that's not the case on switch external ports and it has benefit there. Signed-off-by: Hal Rosenstock h...@mellanox.com --- diff --git a/opensm/osm_pkey_mgr.c b/opensm/osm_pkey_mgr.c index f6bc9d1..06d9b1e 100644 --- a/opensm/osm_pkey_mgr.c +++ b/opensm/osm_pkey_mgr.c @@ -398,6 +398,35 @@ static uint16_t last_used_pkey_index(const osm_port_t * const p_port, return index; } +static int update_peer_block(osm_log_t * p_log, osm_sm_t * sm, + osm_physp_t * peer, + osm_pkey_tbl_t * p_peer_pkey_tbl, + ib_pkey_table_t * new_peer_block, + uint16_t peer_block_idx, osm_node_t * p_node) +{ +int ret = 0; +ib_pkey_table_t *peer_block; + +peer_block = osm_pkey_tbl_block_get(p_peer_pkey_tbl, peer_block_idx); +if (!peer_block || +memcmp(peer_block, new_peer_block, sizeof(*peer_block))) { +if (pkey_mgr_update_pkey_entry(sm, peer, new_peer_block, + peer_block_idx) != IB_SUCCESS) { +OSM_LOG(p_log, OSM_LOG_ERROR, ERR 0509: +pkey_mgr_update_pkey_entry() failed to update +pkey table block %d for node 0x%016 +PRIx64 port %u (%s)\n, +peer_block_idx, +cl_ntoh64(osm_node_get_node_guid(p_node)), +osm_physp_get_port_num(peer), +p_node-print_desc); +ret = -1; +} +} + +return ret; +} + static int pkey_mgr_update_peer_port(osm_log_t * p_log, osm_sm_t * sm, const osm_subn_t * p_subn, const osm_port_t * const p_port, @@ -405,15 +434,16 @@ static int pkey_mgr_update_peer_port(osm_log_t * p_log, osm_sm_t * sm, { osm_physp_t *p_physp, *peer; osm_node_t *p_node; -ib_pkey_table_t *block, *peer_block; +ib_pkey_table_t *block; const osm_pkey_tbl_t *p_pkey_tbl; osm_pkey_tbl_t *p_peer_pkey_tbl; -uint16_t block_index; +uint16_t block_index, peer_block_idx; uint16_t peer_max_blocks; uint16_t last_index; -ib_api_status_t status = IB_SUCCESS; -ib_pkey_table_t empty_block; -int ret = 0; +ib_pkey_table_t new_peer_block; +uint16_t pkey_idx, peer_pkey_idx; +ib_net16_t pkey; +int ret = 0, loop_exit = 0; p_physp = p_port-p_physp; if (!p_physp) @@ -425,70 +455,80 @@ static int pkey_mgr_update_peer_port(osm_log_t * p_log, osm_sm_t * sm, if (!p_node-sw || !p_node-sw-switch_info.enforce_cap) return 0; -p_pkey_tbl = osm_physp_get_pkey_tbl(p_physp); -peer_max_blocks = pkey_mgr_get_physp_max_blocks(peer); -if (peer_max_blocks p_pkey_tbl-used_blocks) { -OSM_LOG(p_log, OSM_LOG_ERROR, ERR 0508: -Not enough pkey blocks (%u %u used) on switch 0x%016 -PRIx64 port %u (%s). Clearing Enforcement bit\n, -peer_max_blocks, p_pkey_tbl-used_blocks, -cl_ntoh64(osm_node_get_node_guid(p_node)), -osm_physp_get_port_num(peer), -p_node-print_desc); -enforce = FALSE; -ret = -1; -} else if (peer_max_blocks == p_pkey_tbl-used_blocks) { -/* Is last used pkey index beyond switch peer port capacity ? */ -last_index = (peer_max_blocks - 1) * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + - last_used_pkey_index(p_port, p_pkey_tbl); -if (cl_ntoh16(p_node-sw-switch_info.enforce_cap) = last_index) { -OSM_LOG(p_log, OSM_LOG_ERROR, ERR 0507: -Not enough pkey entries (%u = %u) on switch 0x%016 -PRIx64 port %u (%s). Clearing Enforcement bit\n, -cl_ntoh16(p_node-sw-switch_info.enforce_cap), -last_index, -cl_ntoh64(osm_node_get_node_guid(p_node)), -osm_physp_get_port_num(peer), -p_node-print_desc); -enforce = FALSE; -ret = -1; -} +if (enforce == FALSE) { +pkey_mgr_enforce_partition(p_log, sm, peer, FALSE); +return -1; Though it doesn't affect anything, why do you return