[PATCH 1/3 V2] opensm: connect switches in tree - added new option
Adding new option: '--connect_switches' This option should connect more switches with a down/up routes in up/down and fat-tree routing. Changes from v1: - added option description in osm_subnet.h - added loading option from options file in osm_subnet.c Signed-off-by: Yevgeny Kliteynik klit...@dev.mellanox.co.il --- opensm/include/opensm/osm_subnet.h | 10 -- opensm/man/opensm.8.in | 11 +-- opensm/opensm/main.c | 15 +-- opensm/opensm/osm_subnet.c |7 +++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h index b63c97e..095b294 100644 --- a/opensm/include/opensm/osm_subnet.h +++ b/opensm/include/opensm/osm_subnet.h @@ -189,6 +189,7 @@ typedef struct osm_subn_opt { char *routing_engine_names; boolean_t use_ucast_cache; boolean_t connect_roots; + boolean_t connect_switches; char *lid_matrix_dump_file; char *lfts_file; char *root_guid_file; @@ -369,8 +370,13 @@ typedef struct osm_subn_opt { * * connect_roots * The option which will enforce root to root connectivity with -* up/down routing engine (even if this violates pure deadlock -* free up/down algorithm) +* up/down and fat-tree routing engines (even if this violates +* pure deadlock free up/down or fat-tree algorithm) +* +* connect_switches +* The option which will enforce switch to switch connectivity +* with up/down and fat-tree routing engines (even if this +* violates pure deadlock free up/down or fat-tree algorithm) * * use_ucast_cache * When TRUE enables unicast routing cache. diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in index 03002c0..d6fceef 100644 --- a/opensm/man/opensm.8.in +++ b/opensm/man/opensm.8.in @@ -19,6 +19,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA) [\-\-lash_start_vl vl number] [\-A | \-\-ucast_cache] [\-z | \-\-connect_roots] +[\-\-connect_switches] [\-M file name | \-\-lid_matrix_file file name] [\-U file name | \-\-lfts_file file name] [\-S | \-\-sadb_file file name] @@ -170,8 +171,14 @@ recalculations: one when the host goes down, and the other when the host comes back online. .TP \fB\-z\fR, \fB\-\-connect_roots\fR -This option enforces a routing engine (currently up/down -only) to make connectivity between root switches and in +This option enforces routing engines (up/down and +fat-tree) to make connectivity between root switches and in +this way to be fully IBA complaint. In many cases this can +violate pure deadlock free algorithm, so use it carefully. +.TP +\fB\-\-connect_switches\fR +This option enforces routing engines (up/down and +fat-tree) to make connectivity between all switches and in this way to be fully IBA complaint. In many cases this can violate pure deadlock free algorithm, so use it carefully. .TP diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c index 2e28c83..8175887 100644 --- a/opensm/opensm/main.c +++ b/opensm/opensm/main.c @@ -189,8 +189,14 @@ static void show_usage(void) printf(--sm_sl sl number\n Sets the SL to use to communicate with the SM/SA. Defaults to 0.\n\n); printf(--connect_roots, -z\n -This option enforces a routing engine (currently\n -up/down only) to make connectivity between root switches\n +This option enforces routing engines (up/down and \n +fat-tree) to make connectivity between root switches\n +and in this way be IBA compliant. In many cases,\n +this can violate \pure\ deadlock free algorithm, so\n +use it carefully.\n\n); + printf(--connect_switches\n +This option enforces routing engines (up/down and \n +fat-tree) to make connectivity between all the switches\n and in this way be IBA compliant. In many cases,\n this can violate \pure\ deadlock free algorithm, so\n use it carefully.\n\n); @@ -610,6 +616,7 @@ int main(int argc, char *argv[]) {do_mesh_analysis, 0, NULL, 5}, {lash_start_vl, 1, NULL, 6}, {sm_sl, 1, NULL, 7}, + {connect_switches, 0, NULL, 8}, {NULL, 0, NULL, 0} /* Required at the end of the array */ }; @@ -983,6 +990,10 @@ int main(int argc, char *argv[]) opt.sm_sl = (uint8_t) temp; printf( SMSL = %d\n, opt.sm_sl); break; + case 8: + opt.connect_switches = TRUE; + printf( Connect switches option is on\n);
[PATCH] opensm/osm_sa.c: don't ignore failure in osm_mgrp_add_port()
Hi Sasha, Small fix in loading SA DB: don't ignore failure in osm_mgrp_add_port() - require clients re-registration. Signed-off-by: Yevgeny Kliteynik klit...@dev.mellanox.co.il --- opensm/opensm/osm_sa.c |7 --- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/opensm/opensm/osm_sa.c b/opensm/opensm/osm_sa.c index a124a45..7b9340d 100644 --- a/opensm/opensm/osm_sa.c +++ b/opensm/opensm/osm_sa.c @@ -1002,9 +1002,10 @@ int osm_sa_db_file_load(osm_opensm_t * p_osm) port = osm_get_port_by_guid(p_osm-subn, guid); if (port cl_qmap_get(p_mgrp-mcm_port_tbl, guid) == - cl_qmap_end(p_mgrp-mcm_port_tbl)) - osm_mgrp_add_port(p_osm-subn, p_osm-log, - p_mgrp, port, mcmr, proxy); + cl_qmap_end(p_mgrp-mcm_port_tbl) + !osm_mgrp_add_port(p_osm-subn, p_osm-log, + p_mgrp, port, mcmr, proxy)) + rereg_clients = 1; } else if (!strncmp(p, Service Record:, 15)) { ib_service_record_t s_rec; uint32_t modified_time, lease_period; -- 1.5.1.4 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] ibutils/ibdm/doc: Fix some typos
Thanks, applied. -- Yevgeny Hal Rosenstock wrote: Signed-off-by: Hal Rosenstock hal.rosenst...@gmail.com --- diff --git a/ibdm/doc/ibdmchk.1 b/ibdm/doc/ibdmchk.1 index 650c9ff..7537473 100644 --- a/ibdm/doc/ibdmchk.1 +++ b/ibdm/doc/ibdmchk.1 @@ -186,7 +186,7 @@ Use enhanced routing algorithm when \s-1LMC\s0 0 and report the resulting path .IX Item -r|--roots roots file A file with all the roots node names (one on each line). .SH VERIFICATION MODE DESCRIPTION .IX Header VERIFICATION MODE DESCRIPTION -After the cluster is built and OpenSM is run (using flag \-D 0x43) it reports the subnet and \s-1FDB\s0 tables into the files osm\-subnet.lst, osm.fdbs and osm.fdbs in /var/log/ (or subnet.lst, osm.fdbs and osm.mcfdbs into /tmp in older OpenSM versions). ibdiagnet is also producing the same files in its outoput directroy. +After the cluster is built and OpenSM is run (using flag \-D 0x43) it reports the subnet and \s-1FDB\s0 tables into the files osm\-subnet.lst, osm.fdbs and osm.fdbs in /var/log/ (or subnet.lst, osm.fdbs and osm.mcfdbs into /tmp in older OpenSM versions). ibdiagnet is also producing the same files in its output directory. Based on these files the utility checks all \s-1CA\s0 to \s-1CA\s0 connectivity. Further analysis for credit deadlock potential is performed and reported. In case of an \s-1LMC\s0 0 it reports histograms for how many systems and nodes are common between the different paths for the same port pairs. .SH ARGUMENTS diff --git a/ibdm/doc/ibdmchk.pod b/ibdm/doc/ibdmchk.pod index e6a2232..dabfafa 100644 --- a/ibdm/doc/ibdmchk.pod +++ b/ibdm/doc/ibdmchk.pod @@ -59,7 +59,7 @@ A file with all the roots node names (one on each line). =head1 VERIFICATION MODE DESCRIPTION -After the cluster is built and OpenSM is run (using flag -D 0x43) it reports the subnet and FDB tables into the files osm-subnet.lst, osm.fdbs and osm.fdbs in /var/log/ (or subnet.lst, osm.fdbs and osm.mcfdbs into /tmp in older OpenSM versions). ibdiagnet is also producing the same files in its outoput directroy. +After the cluster is built and OpenSM is run (using flag -D 0x43) it reports the subnet and FDB tables into the files osm-subnet.lst, osm.fdbs and osm.fdbs in /var/log/ (or subnet.lst, osm.fdbs and osm.mcfdbs into /tmp in older OpenSM versions). ibdiagnet is also producing the same files in its output directory. Based on these files the utility checks all CA to CA connectivity. Further analysis for credit deadlock potential is performed and reported. In case of an LMC 0 it reports histograms for how many systems and nodes are common between the different paths for the same port pairs. -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] infiniband-diags/ibqueryerrors: Use remap'ed node name in clear port error message
On 10:22 Tue 27 Oct , Ira Weiny wrote: From: Ira Weiny wei...@llnl.gov Date: Tue, 27 Oct 2009 10:22:36 -0700 Subject: [PATCH] infiniband-diags/ibqueryerrors: Use remap'ed node name in clear port error message Signed-off-by: Ira Weiny wei...@llnl.gov Applied. Thanks. Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] link-local address fix for rdma_resolve_addr
Jason Gunthorpe wrote: Wow, seriously? You do understand the purpose of review, right? I think I do, maybe not to the depth you and your arguments are, but again, repeating myself: my kind of simple argument is that your review is way beyond the --change-- suggested by a patch but rather of a whole logic, and you block a patch b/c you don't like the logic this patch integrates with. To some extent such practice is excepted, but you took it to way beyond acceptable limit. I don't accept your assertion that the whole logic is broken and it makes sense to me to have a patch from Dave to fix the IPv6 part of it. Next or in parallel you are welcome to sent a patch fixing/re-writing the whole bind logic or even the whole rdma stack or the whole kernel. And yes, actually, accounting for how rdma_bind() is different from bind() when doing route resolution is pretty much the main remaining problem go and fix that Or. -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RDMA] Fixup IPv6 support and IPv4 routing corner cases for RDMA CM
Jason Gunthorpe wrote: **COMPILE TESTED ONLY** any reason why other people have to test for you? Convert the address resolution process for outgoing connections to be very similar to the way the TCP stack does the same operations. This fixes many corner case bugs that can crop up. rdma_join_multicast(3) states that before joining a multicast group, the rdma_cm_id must be bound to an RDMA device by calling rdma_bind_addr or rdma_resolve_addr, please make sure that this flow isn't broken by your patch. Or. -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [ofa-general][PATCH 3/4] SRP fail-over faster
On Sat, 2009-10-24 at 03:35 -0400, Vu Pham wrote: It's a big improvement from 3-5 minutes cutting down to 1s and now you talk about device_loss_timeout=0. I'll look at the trade-off to have it; however, to receive and process the async event (port error) already cost you a fair amount of cycles. I agree that it is a great improvement over just sending packets blindly to the link, and waiting for SCSI to time them out -- I've been using the variant of the patches from OFED -- but it is harder to change things once they are in the mainstream kernel, so I'd like to see it done better. And hey, maybe I'm just overly touchy about this. These should be rare events, and there's nothing we can do about the commands sent prior to being told about the link error. It's just that I don't want my file system to stall the petaflop simulation platforms if I can avoid it -- and there's no reason to send any command down the wire once we've been told there is no link or the target is not there. Maybe we don't need to destroy the link immediately, but we need to let the SCSI mid-layer know that things are failing. -- Dave Dillow National Center for Computational Science Oak Ridge National Laboratory (865) 241-6602 office -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] remove extraneous Windows include
On 10:36 Tue 27 Oct , Stan C. Smith wrote: Remove unneeded Windows include. signed-off-by: stan smith stan.sm...@intel.com Applied. Thanks. Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] opemsm code cleanup
On 10:43 Tue 27 Oct , Stan C. Smith wrote: Cleanup code by removing if statement which does nothing. Signed-off-by: stan smith stan.sm...@intel.com Whitespaces are mangled in the patch. Applied by hands. Thanks. Sasha diff --git a/opensm/opensm/osm_vl15intf.c b/opensm/opensm/osm_vl15intf.c index 9e43a9c..ee9626f 100644 --- a/opensm/opensm/osm_vl15intf.c +++ b/opensm/opensm/osm_vl15intf.c @@ -271,9 +271,6 @@ ib_api_status_t osm_vl15_init(IN osm_vl15_t * p_vl, IN osm_vendor_t * p_vend, */ status = cl_thread_init(p_vl-poller, vl15_poller, p_vl, opensm poller); - if (status != IB_SUCCESS) - goto Exit; - Exit: OSM_LOG_EXIT(p_log); return (status); -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] opensm/osm_sa.c: don't ignore failure in osm_mgrp_add_port()
On 11:54 Wed 28 Oct , Yevgeny Kliteynik wrote: Hi Sasha, Small fix in loading SA DB: don't ignore failure in osm_mgrp_add_port() - require clients re-registration. Signed-off-by: Yevgeny Kliteynik klit...@dev.mellanox.co.il Applied. Thanks. Sasha -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RDMA] Fixup IPv6 support and IPv4 routing corner cases for RDMA CM
On Wed, Oct 28, 2009 at 10:05:19AM -0700, Sean Hefty wrote: Can you explain how rdma_resolve_addr is used in conjunction with multicast? I do not understand what the dest would be. Is it just a man page typo? A UD endpoint can communicate using multicast and to other UD endpoints. A user could resolve a UD endpoint before joining a multicast group. So the IP world analog would be: fd = socket(AF_INET,SOCK_DGRAM); connect(fd,'Some Unicast Address'); setsockopt(fd,IP_MULITCAST_ADD_MEMBERSHIP,'Some Multicast Address'); sendto(fd,...,'Some Multicast Address'); ? I think that is still OK. The routines still bind the rdma cm_id to the devices via rdma_translate_ip pretty much like they did before. There is no support for Linux IP multicast routing though.. Jason -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [ofa-general][PATCH 3/4] SRP fail-over faster
+if (timer_pending(target-qp_err_timer)) +del_timer_sync(target-qp_err_timer); + spin_unlock_irq(target-scsi_host-host_lock); As was pointed out, I don't think you can do del_timer_sync while holding the lock, since the timer function takes the same lock. But I don't know that just switching to del_timer without the sync works here ... without the sync then the timeout function could still run any time after the del_timer, even after everything gets freed. BTW the test of timer_pending isn't needed here... del_timer does the test internally anyway. I do agree it would be very good to improve the SRP error handling. I have some concerns about the overall design here -- it seems that if we handle connection failure and allow a new connection to proceed while cleaning up asynchronously, then this opens the door to a lot of complexity, and I don't see that complexity handled in this patchset. For example, the new connection could fail too before the old one is done cleaning up, etc, etc and we end up with an arbitrarily large queue of things waiting to clean up. Or maybe it really it is simpler than that. I think the best way to move this forward would be to post another cleaned up version of your patch set. Please try to reorganize things so each patch is reasonably self contained. Of course your patchset is taking multiple steps to improve things. But as much as possible, please try to avoid combining two things into a single patch, and conversely also try to avoid putting things into a patch that don't make sense without a later patch. Avoiding policy in the kernel as much as possible in terms of hard-coded timeouts etc is a good goal too. Also it would help to give each patch a separate descriptive subject, and put as much detail in the changelogs as you can. Thanks, Roland -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[GIT PULL] please pull infiniband.git
Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will just get one PCI device ID addition: Eli Cohen (1): mlx4_core: Add a new supported 40 GigE device ID drivers/net/mlx4/main.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 5dd7225..291a505 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -1282,6 +1282,7 @@ static struct pci_device_id mlx4_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */ { PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ { PCI_VDEVICE(MELLANOX, 0x6764) }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/ + { PCI_VDEVICE(MELLANOX, 0x6746) }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ { PCI_VDEVICE(MELLANOX, 0x676e) }, /* MT26478 ConnectX2 40GigE PCIe gen2 */ { 0, } }; -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Adjusting minimum packet size or wait to merge requests in SRP
It appears that SRP tries to coalesce and fragment initiator I/O requests into 64KB packets, as that looks to be the size requested to/from the device on the target side (and the I/O scheduler is disabled on the target). Is there a way to control this, where no coalescing occurs when latency is an issue and requests are small, and no fragmentation occurs when requests are large? Or, am I totally wrong in my assumption that SRP is coalescing/fragmenting data? Thanks, Chris -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Adjusting minimum packet size or wait to merge requests in SRP
On Wed, Oct 28, 2009 at 7:47 PM, Chris Worley worl...@gmail.com wrote: It appears that SRP tries to coalesce and fragment initiator I/O requests into 64KB packets, as that looks to be the size requested to/from the device on the target side (and the I/O scheduler is disabled on the target). Is there a way to control this, where no coalescing occurs when latency is an issue and requests are small, and no fragmentation occurs when requests are large? Or, am I totally wrong in my assumption that SRP is coalescing/fragmenting data? Regarding avoiding coalescing of I/O requests: which I/O scheduler is being used on the initiator system and how has it been configured via sysfs ? Adjusting the constant MAX_RDMA_SIZE in scst/srpt/src/ib_srpt.h might help to avoid fragmentation of large requests by the SRP protocol. Please post a follow-up message to the mailing list with your findings such that MAX_RDMA_SIZE can be converted from a compile-time constant to a sysfs variable if this would be useful. Bart. -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH v3] [RFC] rdma/cm: support option to allow manually setting IB path
But, I still think this API should return EINVAL if the cm_id is in AF_INET/AF_INET6 mode. That is to say, this API only works with the AF_IB idea we have been discussing. I suggest this because using this API really does override the capabilities of the AF_INET/6 in unexpected ways, as the discussion drifted through it seemed like at least bonding, routing and ND operations would/could be overridden. If so then I'd say it should be part of an AF_IB patch. Sean, what are your thoughts on applying it to AF_INET/6? Even without any other kernel changes, this patch enables us to solve the biggest scaling problem that we've measured, so I want to allow it regardless of what the original addressing was. Whether a path record comes from the SA, a local cache, some wonky multicast protocol, or is made up is really independent from how the GIDs were discovered. - Sean -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Adjusting minimum packet size or wait to merge requests in SRP
On Wed, Oct 28, 2009 at 1:14 PM, Bart Van Assche bart.vanass...@gmail.com wrote: On Wed, Oct 28, 2009 at 7:47 PM, Chris Worley worl...@gmail.com wrote: It appears that SRP tries to coalesce and fragment initiator I/O requests into 64KB packets, as that looks to be the size requested to/from the device on the target side (and the I/O scheduler is disabled on the target). Is there a way to control this, where no coalescing occurs when latency is an issue and requests are small, and no fragmentation occurs when requests are large? Or, am I totally wrong in my assumption that SRP is coalescing/fragmenting data? Regarding avoiding coalescing of I/O requests: which I/O scheduler is being used on the initiator system and how has it been configured via sysfs ? There is no scheduler running on either target or initiator on the drives in question (sorry I worded that incorrectly initially), or so I've been told (this information is second-hand). I did see iostat output from the initiator in his case, where there were long waits and service times that I'm guessing was due to some coalescing/merging. There was also a hint in the iostat output that a scheduler was enabled, as there were non-zero values (occasionally) under the [rw]qm/s columns, which, if I understand iostat correctly, means there is a scheduler merging results. So you're saying there is no hold-off for merging on the initiator side of the IB/SRP stack? Adjusting the constant MAX_RDMA_SIZE in scst/srpt/src/ib_srpt.h might help to avoid fragmentation of large requests by the SRP protocol. Please post a follow-up message to the mailing list with your findings such that MAX_RDMA_SIZE can be converted from a compile-time constant to a sysfs variable if this would be useful. Will do. Thanks, Chris Bart. -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Adjusting minimum packet size or wait to merge requests in SRP
It appears that SRP tries to coalesce and fragment initiator I/O requests into 64KB packets, as that looks to be the size requested to/from the device on the target side (and the I/O scheduler is disabled on the target). There is no code in the SRP initiator that does anything to change IO requests that I know of. So I think this is happening somewhere higher in the stack. - R. -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3] [RFC] rdma/cm: support option to allow manually setting IB path
On Wed, Oct 28, 2009 at 02:41:15PM -0700, Sean Hefty wrote: Does a DGID returning API already exist? yes - query_route returns the following information: SGID, DGID, pkey, source address, destination address, and path records (max of 2). Not all of the information is valid, depending on the state of the rdma cm id. The librdmacm already invokes this after rdma_resolve_addr completes. Great, I didn't realize that was there. No further comments from me then Reviewed-By: Jason Gunthorpe jguntho...@obsidianresearch.com Jason -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/5] uDAPL v2: ucm: increase timers during subsequent retries, add create_ah error checking
- increase timers during subsequent retries, - check/process create_ah errors during connect phase, - cleanup some debug messaging. Signed-off-by: Arlin Davis arlin.r.da...@intel.com --- dapl/openib_ucm/cm.c | 81 ++--- 1 files changed, 43 insertions(+), 38 deletions(-) diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c index 96ee382..07b8458 100644 --- a/dapl/openib_ucm/cm.c +++ b/dapl/openib_ucm/cm.c @@ -163,17 +163,16 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer) *timer = cm-hca-ib_trans.cm_timer; /* wait longer each retry */ if ((time - cm-timer)/1000 - (cm-hca-ib_trans.rep_time * cm-retries)) { + (cm-hca-ib_trans.rep_time cm-retries)) { dapl_log(DAPL_DBG_TYPE_WARN, CM_REQ retry %d [lid, port, qpn]: - %x %x %x - %x %x %x \n, -cm-retries, -ntohs(cm-msg.saddr.ib.lid), -ntohs(cm-msg.sport), -ntohl(cm-msg.saddr.ib.qpn), -ntohs(cm-msg.daddr.ib.lid), -ntohs(cm-msg.dport), -ntohl(cm-msg.dqpn)); + %x %x %x - %x %x %x Time(ms) %llu %llu\n, +cm-retries, ntohs(cm-msg.saddr.ib.lid), +ntohs(cm-msg.sport), ntohl(cm-msg.saddr.ib.qpn), +ntohs(cm-msg.daddr.ib.lid), ntohs(cm-msg.dport), +ntohl(cm-msg.dqpn), (time - cm-timer)/1000, +cm-hca-ib_trans.rep_time cm-retries); + cm-retries++; dapl_os_unlock(cm-lock); dapli_cm_connect(cm-ep, cm); return; @@ -182,10 +181,10 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer) case DCM_RTU_PENDING: *timer = cm-hca-ib_trans.cm_timer; if ((time - cm-timer)/1000 - (cm-hca-ib_trans.rtu_time * cm-retries)) { + (cm-hca-ib_trans.rtu_time cm-retries)) { dapl_log(DAPL_DBG_TYPE_WARN, CM_REPLY retry %d [lid, port, qpn]: - %x %x %x - %x %x %x r_pid %x,%d\n, + %x %x %x - %x %x %x r_pid %x,%d Time(ms) %llu %llu\n, cm-retries, ntohs(cm-msg.saddr.ib.lid), ntohs(cm-msg.sport), @@ -194,7 +193,9 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer) ntohs(cm-msg.dport), ntohl(cm-msg.daddr.ib.qpn), ntohl(*(DAT_UINT32*)cm-msg.resv), -ntohl(*(DAT_UINT32*)cm-msg.resv)); +ntohl(*(DAT_UINT32*)cm-msg.resv), +(time - cm-timer)/1000, cm-hca-ib_trans.rtu_time cm-retries); + cm-retries++; dapl_os_unlock(cm-lock); ucm_reply(cm); return; @@ -204,10 +205,10 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer) *timer = cm-hca-ib_trans.cm_timer; /* wait longer each retry */ if ((time - cm-timer)/1000 - (cm-hca-ib_trans.rep_time)) { + (cm-hca-ib_trans.rtu_time cm-retries)) { dapl_log(DAPL_DBG_TYPE_WARN, CM_DREQ retry %d [lid, port, qpn]: - %x %x %x - %x %x %x r_pid %x,%d\n, + %x %x %x - %x %x %x r_pid %x,%d Time(ms) %llu %llu\n, cm-retries, ntohs(cm-msg.saddr.ib.lid), ntohs(cm-msg.sport), @@ -216,7 +217,9 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer) ntohs(cm-msg.dport), ntohl(cm-msg.dqpn), ntohl(*(DAT_UINT32*)cm-msg.resv), -ntohl(*(DAT_UINT32*)cm-msg.resv)); +ntohl(*(DAT_UINT32*)cm-msg.resv), +(time - cm-timer)/1000, cm-hca-ib_trans.rtu_time cm-retries); + cm-retries++; dapl_os_unlock(cm-lock); dapli_cm_disconnect(cm); return; @@ -448,8 +451,8 @@ retry_listenq: } else {
[PATCH 3/5] uDAPL v2: ucm: change some debug message levels, check for valid UD REPLY during retries
Signed-off-by: Arlin Davis arlin.r.da...@intel.com --- dapl/openib_ucm/cm.c | 38 +- 1 files changed, 29 insertions(+), 9 deletions(-) diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c index 07b8458..b28e911 100644 --- a/dapl/openib_ucm/cm.c +++ b/dapl/openib_ucm/cm.c @@ -395,12 +395,22 @@ static void ucm_process_recv(ib_hca_transport_t *tp, } dapl_os_unlock(cm-lock); break; - + case DCM_RELEASED: + /* UD reply retried, ignore */ + if (ntohs(msg-op) != DCM_REP) { + dapl_log(DAPL_DBG_TYPE_WARN, +ucm_recv: UNKNOWN operation +- op %d, %s spsp %d sqpn %d\n, + ntohs(msg-op), dapl_cm_state_str(cm-state), + ntohs(msg-sport), ntohl(msg-sqpn)); + } + dapl_os_unlock(cm-lock); + break; default: dapl_log(DAPL_DBG_TYPE_WARN, ucm_recv: UNKNOWN state -- op %d, st %d spsp %d sqpn %d\n, - ntohs(msg-op), cm-state, +- op %d, %s spsp %d sqpn %d\n, + ntohs(msg-op), dapl_cm_state_str(cm-state), ntohs(msg-sport), ntohl(msg-sqpn)); dapl_os_unlock(cm-lock); break; @@ -479,7 +489,7 @@ retry_listenq: /* not match on listenq for valid request, send reject */ if (ntohs(msg-op) == DCM_REQ !found) ucm_reject(tp, msg); -#if DAPL_DBG + if (!found) { dapl_log(DAPL_DBG_TYPE_WARN, ucm_recv: NO MATCH op %s 0x%x %d i0x%x c0x%x @@ -490,7 +500,7 @@ retry_listenq: ntohs(msg-saddr.ib.lid), ntohs(msg-sport), ntohl(msg-saddr.ib.qpn)); } -#endif + return found; } @@ -813,8 +823,18 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm) case DCM_DISC_PENDING: /* DREQ timeout, resend until retries exhausted */ cm-msg.op = htons(DCM_DREQ); - if (cm-retries = cm-hca-ib_trans.retries) + if (cm-retries = cm-hca-ib_trans.retries) { + dapl_log(DAPL_DBG_TYPE_ERR, +CM_DREQ: RETRIES EXHAUSTED: +0x%x %d 0x%x - 0x%x %d 0x%x\n, + htons(cm-msg.saddr.ib.lid), + htonl(cm-msg.saddr.ib.qpn), + htons(cm-msg.sport), + htons(cm-msg.daddr.ib.lid), + htonl(cm-msg.dqpn), + htons(cm-msg.dport)); finalize = 1; + } break; case DCM_DISC_RECV: /* DREQ received, send DREP and schedule event */ @@ -857,7 +877,7 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm) } if (cm-retries == cm-hca-ib_trans.retries) { - dapl_log(DAPL_DBG_TYPE_WARN, + dapl_log(DAPL_DBG_TYPE_ERR, CM_REQ: RETRIES EXHAUSTED: 0x%x %d 0x%x - 0x%x %d 0x%x\n, htons(cm-msg.saddr.ib.lid), @@ -1289,8 +1309,8 @@ static int ucm_reply(dp_ib_cm_handle_t cm) } if (cm-retries == cm-hca-ib_trans.retries) { - dapl_log(DAPL_DBG_TYPE_WARN, - CM_REP: RETRIES EXHAUSTED + dapl_log(DAPL_DBG_TYPE_ERR, + CM_REPLY: RETRIES EXHAUSTED 0x%x %d 0x%x - 0x%x %d 0x%x\n, htons(cm-msg.saddr.ib.lid), htons(cm-msg.sport), -- 1.5.2.5 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/5] uDAPL v2: Patch series for ucm, scm: fixes for issues discovered during scale-up, out testing
Linux testing completed with Intel MPI/HPCC benchmarks on 128 nodes, 1024 cores. ucm, scm: address handles need destroyed when freeing Endpoints with UD QP's. Signed-off-by: Arlin Davis arlin.r.da...@intel.com --- dapl/openib_scm/cm.c |4 dapl/openib_ucm/cm.c |6 ++ dapl/openib_ucm/dapl_ib_util.h |1 + dapl/openib_ucm/device.c | 16 ++-- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c index 453e32e..0d2d058 100644 --- a/dapl/openib_scm/cm.c +++ b/dapl/openib_scm/cm.c @@ -355,6 +355,10 @@ multi_cleanup: dapl_os_lock(cr-lock); hca_ptr = cr-hca; cr-ep = NULL; + if (cr-ah) { + ibv_destroy_ah(cr-ah); + cr-ah = NULL; + } cr-state = DCM_DESTROY; dapl_os_unlock(cr-lock); } diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c index cc480c4..96ee382 100644 --- a/dapl/openib_ucm/cm.c +++ b/dapl/openib_ucm/cm.c @@ -679,6 +679,10 @@ static void ucm_ud_free(DAPL_EP *ep) dapl_os_lock(cm-lock); hca = cm-hca; cm-ep = NULL; + if (cm-ah) { + ibv_destroy_ah(cm-ah); + cm-ah = NULL; + } cm-state = DCM_DESTROY; dapl_os_unlock(cm-lock); } @@ -1041,6 +1045,7 @@ ud_bail: event = IB_CME_LOCAL_FAILURE; goto bail; } + cm-ah = xevent.remote_ah.ah; /* keep ref to destroy */ dapl_os_memcpy(xevent.remote_ah.ia_addr, cm-msg.daddr, @@ -1218,6 +1223,7 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg) if (xevent.remote_ah.ah == NULL) goto bail; + cm-ah = xevent.remote_ah.ah; /* keep ref to destroy */ dapl_os_memcpy(xevent.remote_ah.ia_addr, cm-msg.daddr, sizeof(union dcm_addr)); diff --git a/dapl/openib_ucm/dapl_ib_util.h b/dapl/openib_ucm/dapl_ib_util.h index 27ff8dd..6273459 100644 --- a/dapl/openib_ucm/dapl_ib_util.h +++ b/dapl/openib_ucm/dapl_ib_util.h @@ -43,6 +43,7 @@ struct ib_cm_handle struct dapl_hca *hca; struct dapl_sp *sp; struct dapl_ep *ep; + struct ibv_ah *ah; uint16_tp_size; /* accept p_data, for retries */ uint8_t p_data[DCM_MAX_PDATA_SIZE]; ib_cm_msg_t msg; diff --git a/dapl/openib_ucm/device.c b/dapl/openib_ucm/device.c index 077446b..e890eef 100644 --- a/dapl/openib_ucm/device.c +++ b/dapl/openib_ucm/device.c @@ -434,14 +434,18 @@ static void ucm_service_destroy(IN DAPL_HCA *hca) if (tp-rch) ibv_destroy_comp_channel(tp-rch); -dapl_log(DAPL_DBG_TYPE_UTIL, - destroy_service: pd %p ctx %p handle 0x%x\n, - tp-pd, tp-pd-context, tp-pd-handle); - if (tp-pd) - ibv_dealloc_pd(tp-pd); + if (tp-ah) { + int i; - if (tp-ah) + for (i = 0;i 0x; i++) { + if (tp-ah[i]) + ibv_destroy_ah(tp-ah[i]); + } dapl_os_free(tp-ah, (sizeof(*tp-ah) * 0x)); + } + + if (tp-pd) + ibv_dealloc_pd(tp-pd); if (tp-sid) dapl_os_free(tp-sid, (sizeof(*tp-sid) * 0x)); -- 1.5.2.5 -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH libibverbs] Make ibv_get_device_list return codes via errno
- Supress fprintf stderr on sundry cases like no verbs drivers loaded - Fix double free in find_sysfs_devs if ibv_read_sysfs_file fails (unlikely) - Update all example programs and man page Code expecting this behavior linking to old libibverbs will get the old fprint and errno set to garbage (probably ESPIPE). Signed-off-by: Jason Gunthorpe jguntho...@obsidianresearch.com --- examples/asyncwatch.c |2 +- examples/device_list.c|2 +- examples/devinfo.c|4 ++-- examples/rc_pingpong.c|2 +- examples/srq_pingpong.c |2 +- examples/uc_pingpong.c|2 +- examples/ud_pingpong.c|2 +- man/ibv_get_device_list.3 | 16 +++- src/device.c | 21 +++-- src/init.c| 38 ++ 10 files changed, 56 insertions(+), 35 deletions(-) The double free thing could be split out if you want, I just caught it while working on this. Works like this: $ build/examples/ibv_devinfo Failed to get IB devices list: Function not implemented $ build/examples/ibv_devinfo No IB devices found diff --git a/examples/asyncwatch.c b/examples/asyncwatch.c index 16aee2c..e56b4dc 100644 --- a/examples/asyncwatch.c +++ b/examples/asyncwatch.c @@ -82,7 +82,7 @@ int main(int argc, char *argv[]) dev_list = ibv_get_device_list(NULL); if (!dev_list) { - fprintf(stderr, No IB devices found\n); + perror(Failed to get IB devices list); return 1; } diff --git a/examples/device_list.c b/examples/device_list.c index 3ce8cbd..70c3af5 100644 --- a/examples/device_list.c +++ b/examples/device_list.c @@ -49,7 +49,7 @@ int main(int argc, char *argv[]) dev_list = ibv_get_device_list(num_devices); if (!dev_list) { - fprintf(stderr, No IB devices found\n); + perror(Failed to get IB devices list); return 1; } diff --git a/examples/devinfo.c b/examples/devinfo.c index caa5d5f..33d1a9b 100644 --- a/examples/devinfo.c +++ b/examples/devinfo.c @@ -361,7 +361,7 @@ int main(int argc, char *argv[]) case 'l': dev_list = orig_dev_list = ibv_get_device_list(num_of_hcas); if (!dev_list) { - fprintf(stderr, Failed to get IB devices list); + perror(Failed to get IB devices list); return -1; } @@ -387,7 +387,7 @@ int main(int argc, char *argv[]) dev_list = orig_dev_list = ibv_get_device_list(NULL); if (!dev_list) { - fprintf(stderr, Failed to get IB device list\n); + perror(Failed to get IB devices list); return -1; } diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c index d4115e4..fa969e0 100644 --- a/examples/rc_pingpong.c +++ b/examples/rc_pingpong.c @@ -593,7 +593,7 @@ int main(int argc, char *argv[]) dev_list = ibv_get_device_list(NULL); if (!dev_list) { - fprintf(stderr, No IB devices found\n); + perror(Failed to get IB devices list); return 1; } diff --git a/examples/srq_pingpong.c b/examples/srq_pingpong.c index e47bae6..1e36c57 100644 --- a/examples/srq_pingpong.c +++ b/examples/srq_pingpong.c @@ -682,7 +682,7 @@ int main(int argc, char *argv[]) dev_list = ibv_get_device_list(NULL); if (!dev_list) { - fprintf(stderr, No IB devices found\n); + perror(Failed to get IB devices list); return 1; } diff --git a/examples/uc_pingpong.c b/examples/uc_pingpong.c index 404b059..6f31247 100644 --- a/examples/uc_pingpong.c +++ b/examples/uc_pingpong.c @@ -581,7 +581,7 @@ int main(int argc, char *argv[]) dev_list = ibv_get_device_list(NULL); if (!dev_list) { - fprintf(stderr, No IB devices found\n); + perror(Failed to get IB devices list); return 1; } diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c index 8f3d50b..6f10212 100644 --- a/examples/ud_pingpong.c +++ b/examples/ud_pingpong.c @@ -580,7 +580,7 @@ int main(int argc, char *argv[]) dev_list = ibv_get_device_list(NULL); if (!dev_list) { - fprintf(stderr, No IB devices found\n); + perror(Failed to get IB devices list); return 1; } diff --git a/man/ibv_get_device_list.3 b/man/ibv_get_device_list.3 index 003fffb..16cc1a0 100644 --- a/man/ibv_get_device_list.3 +++ b/man/ibv_get_device_list.3 @@ -25,10 +25,24 @@ returned by .B ibv_get_device_list()\fR. .SH RETURN VALUE .B ibv_get_device_list() -returns the array of available RDMA devices, or NULL if the request fails. +returns the array of available RDMA devices, or sets +.I errno +and returns NULL if the request fails. If no
[infiniband-diags] [PATCH] [2/2] remove 'dist' field from ibnd_node_t, which was virtually not used
Remove the 'dist' field from the ibnd_node_t struct and rearch code appropriately. It ends up this field was only used to pass a value from create_node() to add_to_nodedist(), of which create_node() is the only function that calls add_to_nodedist(). In other words, it served pretty much no purpose. Al -- Albert Chu ch...@llnl.gov Computer Scientist High Performance Systems Division Lawrence Livermore National Laboratory From: Albert Chu ch...@llnl.gov Date: Wed, 28 Oct 2009 16:18:39 -0700 Subject: [PATCH] remove 'dist' field from ibnd_node_t, which was virtually not used Signed-off-by: Albert Chu ch...@llnl.gov --- .../libibnetdisc/include/infiniband/ibnetdisc.h|1 - infiniband-diags/libibnetdisc/src/ibnetdisc.c |6 ++ 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h index 8303175..a8d290c 100644 --- a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h +++ b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h @@ -48,7 +48,6 @@ typedef struct ibnd_node { struct ibnd_node *next; /* all node list in fabric */ ib_portid_t path_portid;/* path from from_node */ - int dist; /* num of hops from from_node */ int smalid; int smalmc; diff --git a/infiniband-diags/libibnetdisc/src/ibnetdisc.c b/infiniband-diags/libibnetdisc/src/ibnetdisc.c index b25c3d0..047b705 100644 --- a/infiniband-diags/libibnetdisc/src/ibnetdisc.c +++ b/infiniband-diags/libibnetdisc/src/ibnetdisc.c @@ -387,9 +387,8 @@ static void add_to_type_list(ibnd_node_t * node, ibnd_fabric_t * fabric) } } -static void add_to_nodedist(ibnd_node_t * node, ibnd_scan_t * ibnd_scan) +static void add_to_nodedist(ibnd_node_t * node, ibnd_scan_t * ibnd_scan, int dist) { - int dist = node-dist; if (node-type != IB_NODE_SWITCH) dist = MAXHOPS; /* special Ca list */ @@ -410,7 +409,6 @@ static ibnd_node_t *create_node(ibnd_fabric_t * fabric, ibnd_scan_t * ibnd_scan, } memcpy(node, temp, sizeof(*node)); - node-dist = dist; node-path_portid = *path; add_to_nodeguid_hash(node, fabric-nodestbl); @@ -420,7 +418,7 @@ static ibnd_node_t *create_node(ibnd_fabric_t * fabric, ibnd_scan_t * ibnd_scan, fabric-nodes = (ibnd_node_t *) node; add_to_type_list(node, fabric); - add_to_nodedist(node, ibnd_scan); + add_to_nodedist(node, ibnd_scan, dist); return node; } -- 1.5.4.5
[infiniband-diags] [PATCH] [1/2] split out ibnd_fabric_t fields that are only used during a scan
Split out public parameters from ibnd_fabric_t that are useless b/c they are only used during the ibnetdiscover scan. Note that this patch has similarities to a previous patch from Ira, however it is separate and independent of that patch series. Al -- Albert Chu ch...@llnl.gov Computer Scientist High Performance Systems Division Lawrence Livermore National Laboratory From: Albert Chu ch...@llnl.gov Date: Tue, 27 Oct 2009 16:16:14 -0700 Subject: [PATCH] split out ibnd_fabric_t fields that are only used during a scan Signed-off-by: Albert Chu ch...@llnl.gov --- .../libibnetdisc/include/infiniband/ibnetdisc.h|6 -- infiniband-diags/libibnetdisc/src/chassis.c| 68 infiniband-diags/libibnetdisc/src/chassis.h|2 +- infiniband-diags/libibnetdisc/src/ibnetdisc.c | 60 + infiniband-diags/libibnetdisc/src/internal.h | 10 +++ 5 files changed, 83 insertions(+), 63 deletions(-) diff --git a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h index c55ce00..8303175 100644 --- a/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h +++ b/infiniband-diags/libibnetdisc/include/infiniband/ibnetdisc.h @@ -124,7 +124,6 @@ typedef struct ibnd_chassis { } ibnd_chassis_t; #define HTSZ 137 -#define MAXHOPS63 /** = * Fabric @@ -145,14 +144,9 @@ typedef struct ibnd_fabric { /* internal use only */ ibnd_node_t *nodestbl[HTSZ]; ibnd_port_t *portstbl[HTSZ]; - ibnd_node_t *nodesdist[MAXHOPS + 1]; - ibnd_chassis_t *first_chassis; - ibnd_chassis_t *current_chassis; - ibnd_chassis_t *last_chassis; ibnd_node_t *switches; ibnd_node_t *ch_adapters; ibnd_node_t *routers; - ib_portid_t selfportid; } ibnd_fabric_t; /** = diff --git a/infiniband-diags/libibnetdisc/src/chassis.c b/infiniband-diags/libibnetdisc/src/chassis.c index 4886cfc..5043f42 100644 --- a/infiniband-diags/libibnetdisc/src/chassis.c +++ b/infiniband-diags/libibnetdisc/src/chassis.c @@ -96,7 +96,7 @@ static ibnd_chassis_t *find_chassisnum(ibnd_fabric_t * fabric, { ibnd_chassis_t *current; - for (current = fabric-first_chassis; current; current = current-next) { + for (current = fabric-chassis; current; current = current-next) { if (current-chassisnum == chassisnum) return current; } @@ -214,7 +214,7 @@ static ibnd_chassis_t *find_chassisguid(ibnd_fabric_t * fabric, uint64_t chguid; chguid = get_chassisguid(node); - for (current = fabric-first_chassis; current; current = current-next) { + for (current = fabric-chassis; current; current = current-next) { if (current-chassisguid == chguid) return current; } @@ -782,19 +782,19 @@ static void voltaire_portmap(ibnd_port_t * port) port-ext_portnum = int2ext_map_slb8[chipnum][portnum]; } -static int add_chassis(ibnd_fabric_t * fabric) +static int add_chassis(ibnd_scan_t *ibnd_scan) { - if (!(fabric-current_chassis = calloc(1, sizeof(ibnd_chassis_t { + if (!(ibnd_scan-current_chassis = calloc(1, sizeof(ibnd_chassis_t { IBND_ERROR(OOM: failed to allocate chassis object\n); return (-1); } - if (fabric-first_chassis == NULL) { - fabric-first_chassis = fabric-current_chassis; - fabric-last_chassis = fabric-current_chassis; + if (ibnd_scan-first_chassis == NULL) { + ibnd_scan-first_chassis = ibnd_scan-current_chassis; + ibnd_scan-last_chassis = ibnd_scan-current_chassis; } else { - fabric-last_chassis-next = fabric-current_chassis; - fabric-last_chassis = fabric-current_chassis; + ibnd_scan-last_chassis-next = ibnd_scan-current_chassis; + ibnd_scan-last_chassis = ibnd_scan-current_chassis; } return (0); } @@ -818,33 +818,35 @@ static void add_node_to_chassis(ibnd_chassis_t * chassis, ibnd_node_t * node) Returns: 0 on success, -1 on failure */ -int group_nodes(ibnd_fabric_t * fabric) +int group_nodes(ibnd_fabric_t * fabric, ibnd_scan_t *ibnd_scan) { ibnd_node_t *node; int dist; int chassisnum = 0; ibnd_chassis_t *chassis; + ibnd_chassis_t *ch, *ch_next; - fabric-first_chassis = NULL; - fabric-current_chassis = NULL; + ibnd_scan-first_chassis = NULL; + ibnd_scan-current_chassis = NULL; + ibnd_scan-last_chassis = NULL; /* first pass on switches and build for every Voltaire node */ /* an appropriate chassis record (slotnum and position) */