Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package libfabric for openSUSE:Factory checked in at 2021-04-08 21:01:51 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/libfabric (Old) and /work/SRC/openSUSE:Factory/.libfabric.new.2401 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "libfabric" Thu Apr 8 21:01:51 2021 rev:26 rq:882724 version:1.12.1 Changes: -------- --- /work/SRC/openSUSE:Factory/libfabric/fabtests.changes 2021-03-16 15:43:58.568996062 +0100 +++ /work/SRC/openSUSE:Factory/.libfabric.new.2401/fabtests.changes 2021-04-08 21:02:05.981896371 +0200 @@ -1,0 +2,11 @@ +Fri Apr 2 07:30:34 UTC 2021 - Nicolas Morey-Chaisemartin <nmoreychaisemar...@suse.com> + +- Update to 1.12.1 + - Fix initialization checks for CUDA HMEM support + - Fail if a memory monitor is requested but not available + - Adjust priority of psm3 provider to prefer HW specific providers, + such as efa and psm2 + - EFA and PSM3 fixes + - See NEWS.md for changelog + +------------------------------------------------------------------- libfabric.changes: same change Old: ---- libfabric-1.12.0.0.b5c35d115b31.tar.bz2 New: ---- libfabric-1.12.1.0.08c7a6af92d3.tar.bz2 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ fabtests.spec ++++++ --- /var/tmp/diff_new_pack.Esy90D/_old 2021-04-08 21:02:06.781897235 +0200 +++ /var/tmp/diff_new_pack.Esy90D/_new 2021-04-08 21:02:06.785897240 +0200 @@ -16,10 +16,10 @@ # -%define git_ver .0.b5c35d115b31 +%define git_ver .0.08c7a6af92d3 Name: fabtests -Version: 1.12.0 +Version: 1.12.1 Release: 0 Summary: Test suite for libfabric API License: BSD-2-Clause OR GPL-2.0-only ++++++ libfabric.spec ++++++ --- /var/tmp/diff_new_pack.Esy90D/_old 2021-04-08 21:02:06.809897265 +0200 +++ /var/tmp/diff_new_pack.Esy90D/_new 2021-04-08 21:02:06.813897270 +0200 @@ -17,10 +17,10 @@ # -%define git_ver .0.b5c35d115b31 +%define git_ver .0.08c7a6af92d3 Name: libfabric -Version: 1.12.0 +Version: 1.12.1 Release: 0 Summary: User-space RDMA Fabric Interfaces License: BSD-2-Clause OR GPL-2.0-only ++++++ _service ++++++ --- /var/tmp/diff_new_pack.Esy90D/_old 2021-04-08 21:02:06.853897313 +0200 +++ /var/tmp/diff_new_pack.Esy90D/_new 2021-04-08 21:02:06.853897313 +0200 @@ -8,7 +8,7 @@ <param name="versionformat">@PARENT_TAG@.@TAG_OFFSET@.%h</param> <param name="versionrewrite-pattern">v(.*)</param> <param name="versionrewrite-replacement">\1</param> - <param name="revision">b5c35d115b31af377d90769a2d1c8302a1d44b31</param> + <param name="revision">08c7a6af92d35b29c5b120f43b24dbdba7becaf5</param> </service> <service name="recompress" mode="disabled"> <param name="file">libfabric*.tar</param> ++++++ libfabric-1.12.0.0.b5c35d115b31.tar.bz2 -> libfabric-1.12.1.0.08c7a6af92d3.tar.bz2 ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/Makefile.am new/libfabric-1.12.1.0.08c7a6af92d3/Makefile.am --- old/libfabric-1.12.0.0.b5c35d115b31/Makefile.am 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/Makefile.am 2021-04-01 23:01:34.000000000 +0200 @@ -185,7 +185,7 @@ src_libfabric_la_DEPENDENCIES = libfabric.map if !EMBEDDED -src_libfabric_la_LDFLAGS += -version-info 16:0:15 +src_libfabric_la_LDFLAGS += -version-info 16:1:15 endif src_libfabric_la_LDFLAGS += -export-dynamic \ $(libfabric_version_script) @@ -425,6 +425,7 @@ man_MANS = $(real_man_pages) $(prov_install_man_pages) $(dummy_man_pages) EXTRA_DIST += \ + autogen.sh \ NEWS.md \ libfabric.spec.in \ config/distscript.pl \ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/NEWS.md new/libfabric-1.12.1.0.08c7a6af92d3/NEWS.md --- old/libfabric-1.12.0.0.b5c35d115b31/NEWS.md 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/NEWS.md 2021-04-01 23:01:34.000000000 +0200 @@ -6,6 +6,33 @@ version 1.0. New major releases include all fixes from minor releases with earlier release dates. +v1.12.1, Thu Apr 1, 2021 +======================== + +## Core + +- Fix initialization checks for CUDA HMEM support +- Fail if a memory monitor is requested but not available +- Adjust priority of psm3 provider to prefer HW specific providers, + such as efa and psm2 + +## EFA +- Adjust timing clearing the deferred MR list to fix memory leak +- Repost handshake packets on EAGAIN failure +- Enable mr cache for CUDA memory +- Support FI_HMEM and FI_LOCAL_COMM when used together +- Skip using shm provider when FI_HMEM is requested + +## PSM3 +- Fix AVX2 configure check +- Fix conflict with with-psm2-src build option to prevent duplicate + symbols +- Fix checksum generation to support different builddir +- Remove dependency on librdmacm header files +- Use AR variable instead of calling ar directly in automake tools +- Add missing PACK_SUFFIX to header + + v1.12.0, Mon Mar 8, 2021 ========================= diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/config/fi_strip_optflags.m4 new/libfabric-1.12.1.0.08c7a6af92d3/config/fi_strip_optflags.m4 --- old/libfabric-1.12.0.0.b5c35d115b31/config/fi_strip_optflags.m4 1970-01-01 01:00:00.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/config/fi_strip_optflags.m4 2021-04-01 23:01:34.000000000 +0200 @@ -0,0 +1,62 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2014-2021 Intel, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +dnl +dnl This file derived from config/opal_strip_optflags.m4 in Open MPI. +dnl +dnl Example Usage: +dnl FI_STRIP_OPTFLAGS($CFLAGS) +dnl CFLAGS_WITHOUT_OPTFLAGS="$s_result" + +AC_DEFUN([FI_STRIP_OPTFLAGS],[ + +# Process a set of flags and remove all debugging and optimization +# flags + +s_arg="$1" +s_result= +for s_word in $s_arg; do + # See http://www.gnu.org/software/autoconf/manual/html_node/Quadrigraphs.html#Quadrigraphs + # for an explanation of @<:@ and @:>@ -- they m4 expand to [ and ] + case $s_word in + -g) ;; + -g@<:@1-3@:>@) ;; + +K@<:@0-5@:>@) ;; + -O) ;; + -O@<:@0-9@:>@) ;; + -xO) ;; + -xO@<:@0-9@:>@) ;; + -fast) ;; + -finline-functions) ;; + + # The below Sun Studio flags require or + # trigger -xO optimization + -xvector*) ;; + -xdepend=yes) ;; + + *) s_result="$s_result $s_word" + esac +done + +# Clean up + +unset s_word s_arg]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/configure.ac new/libfabric-1.12.1.0.08c7a6af92d3/configure.ac --- old/libfabric-1.12.0.0.b5c35d115b31/configure.ac 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/configure.ac 2021-04-01 23:01:34.000000000 +0200 @@ -7,7 +7,7 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ([2.60]) -AC_INIT([libfabric], [1.12.0], [of...@lists.openfabrics.org]) +AC_INIT([libfabric], [1.12.1], [of...@lists.openfabrics.org]) AC_CONFIG_SRCDIR([src/fabric.c]) AC_CONFIG_AUX_DIR(config) AC_CONFIG_MACRO_DIR(config) @@ -15,6 +15,7 @@ AM_INIT_AUTOMAKE([1.11 dist-bzip2 foreign -Wall -Werror subdir-objects parallel-tests tar-pax]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) m4_include(config/fi_check_package.m4) +m4_include(config/fi_strip_optflags.m4) AC_CANONICAL_HOST diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/fabtests/configure.ac new/libfabric-1.12.1.0.08c7a6af92d3/fabtests/configure.ac --- old/libfabric-1.12.0.0.b5c35d115b31/fabtests/configure.ac 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/fabtests/configure.ac 2021-04-01 23:01:34.000000000 +0200 @@ -5,7 +5,7 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ(2.57) -AC_INIT([fabtests], [1.12.0], [of...@lists.openfabrics.org]) +AC_INIT([fabtests], [1.12.1], [of...@lists.openfabrics.org]) AC_CONFIG_AUX_DIR(config) AC_CONFIG_MACRO_DIR(config) AC_CONFIG_HEADERS(config.h) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/include/ofi_hmem.h new/libfabric-1.12.1.0.08c7a6af92d3/include/ofi_hmem.h --- old/libfabric-1.12.0.0.b5c35d115b31/include/ofi_hmem.h 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/include/ofi_hmem.h 2021-04-01 23:01:34.000000000 +0200 @@ -88,6 +88,25 @@ #endif /* HAVE_ROCR */ +struct ofi_hmem_ops { + bool initialized; + int (*init)(void); + int (*cleanup)(void); + int (*copy_to_hmem)(uint64_t device, void *dest, const void *src, + size_t size); + int (*copy_from_hmem)(uint64_t device, void *dest, const void *src, + size_t size); + bool (*is_addr_valid)(const void *addr); + int (*get_handle)(void *dev_buf, void **handle); + int (*open_handle)(void **handle, uint64_t device, void **ipc_ptr); + int (*close_handle)(void *ipc_ptr); + int (*host_register)(void *ptr, size_t size); + int (*host_unregister)(void *ptr); + int (*get_base_addr)(const void *ptr, void **base); +}; + +extern struct ofi_hmem_ops hmem_ops[]; + int rocr_copy_from_dev(uint64_t device, void *dest, const void *src, size_t size); int rocr_copy_to_dev(uint64_t device, void *dest, const void *src, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/include/rdma/fabric.h new/libfabric-1.12.1.0.08c7a6af92d3/include/rdma/fabric.h --- old/libfabric-1.12.0.0.b5c35d115b31/include/rdma/fabric.h 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/include/rdma/fabric.h 2021-04-01 23:01:34.000000000 +0200 @@ -80,7 +80,7 @@ #define FI_MAJOR_VERSION 1 #define FI_MINOR_VERSION 12 -#define FI_REVISION_VERSION 0 +#define FI_REVISION_VERSION 1 enum { FI_PATH_MAX = 256, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/include/windows/config.h new/libfabric-1.12.1.0.08c7a6af92d3/include/windows/config.h --- old/libfabric-1.12.0.0.b5c35d115b31/include/windows/config.h 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/include/windows/config.h 2021-04-01 23:01:34.000000000 +0200 @@ -165,7 +165,7 @@ #define PACKAGE_TARNAME PACKAGE /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.12.0" +#define PACKAGE_VERSION "1.12.1" /* Define to the full name and version of this package. */ #define PACKAGE_STRING PACKAGE_NAME " " PACKAGE_VERSION diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/efa_av.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/efa_av.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/efa_av.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/efa_av.c 2021-04-01 23:01:34.000000000 +0200 @@ -339,6 +339,7 @@ util_ep = container_of(ep_list_entry, struct util_ep, av_entry); rxr_ep = container_of(util_ep, struct rxr_ep, util_ep); peer = rxr_ep_get_peer(rxr_ep, *fi_addr); + peer->efa_fiaddr = *fi_addr; peer->is_self = efa_is_same_addr((struct efa_ep_addr *)rxr_ep->core_addr, addr); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/efa_domain.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/efa_domain.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/efa_domain.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/efa_domain.c 2021-04-01 23:01:34.000000000 +0200 @@ -213,6 +213,7 @@ int ret; struct ofi_mem_monitor *memory_monitors[OFI_HMEM_MAX] = { [FI_HMEM_SYSTEM] = uffd_monitor, + [FI_HMEM_CUDA] = cuda_monitor, }; fi = efa_get_efa_info(info->domain_attr->name); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/efa_mr.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/efa_mr.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/efa_mr.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/efa_mr.c 2021-04-01 23:01:34.000000000 +0200 @@ -151,7 +151,6 @@ struct efa_mr *efa_mr; struct ofi_mr_entry *entry; int ret; - static const int EFA_MR_CACHE_FLUSH_CHECK = 512; if (flags & OFI_MR_NOCACHE) { ret = efa_mr_regattr(fid, attr, flags, mr_fid); @@ -167,10 +166,6 @@ domain = container_of(fid, struct efa_domain, util_domain.domain_fid.fid); - if (domain->cache->cached_cnt > 0 && domain->cache->cached_cnt % EFA_MR_CACHE_FLUSH_CHECK==0) { - ofi_mr_cache_flush(domain->cache, false); - } - ret = ofi_mr_cache_search(domain->cache, attr, &entry); if (OFI_UNLIKELY(ret)) return ret; @@ -307,6 +302,9 @@ if (efa_mr->domain->ctx->device_caps & EFADV_DEVICE_ATTR_CAPS_RDMA_READ) fi_ibv_access |= IBV_ACCESS_REMOTE_READ; + if (efa_mr->domain->cache) + ofi_mr_cache_flush(efa_mr->domain->cache, false); + efa_mr->ibv_mr = ibv_reg_mr(efa_mr->domain->ibv_pd, (void *)mr_attr->mr_iov->iov_base, mr_attr->mr_iov->iov_len, fi_ibv_access); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr.h new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr.h --- old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr.h 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr.h 2021-04-01 23:01:34.000000000 +0200 @@ -282,7 +282,7 @@ }; #define RXR_PEER_REQ_SENT BIT_ULL(0) /* sent a REQ to the peer, peer should send a handshake back */ -#define RXR_PEER_HANDSHAKE_SENT BIT_ULL(1) +#define RXR_PEER_HANDSHAKE_SENT_OR_QUEUED BIT_ULL(1) #define RXR_PEER_HANDSHAKE_RECEIVED BIT_ULL(2) #define RXR_PEER_IN_BACKOFF BIT_ULL(3) /* peer is in backoff, not allowed to send */ #define RXR_PEER_BACKED_OFF BIT_ULL(4) /* peer backoff was increased during this loop of the progress engine */ @@ -303,6 +303,7 @@ bool rx_init; /* tracks initialization of rx state */ bool is_self; /* self flag */ bool is_local; /* local/remote peer flag */ + fi_addr_t efa_fiaddr; /* fi_addr_t addr from efa provider */ fi_addr_t shm_fiaddr; /* fi_addr_t addr from shm provider */ struct rxr_robuf *robuf; /* tracks expected msg_id on rx */ uint32_t next_msg_id; /* sender's view of msg_id */ @@ -317,6 +318,7 @@ int timeout_interval; /* initial RNR timeout value */ int rnr_timeout_exp; /* RNR timeout exponentation calc val */ struct dlist_entry rnr_entry; /* linked to rxr_ep peer_backoff_list */ + struct dlist_entry queued_entry; /* linked with peer_queued_list in rxr_ep */ }; struct rxr_queued_ctrl_info { @@ -651,6 +653,8 @@ struct dlist_entry read_pending_list; /* rxr_peer entries that are in backoff due to RNR */ struct dlist_entry peer_backoff_list; + /* rxr_peer entries that will retry posting handshake pkt */ + struct dlist_entry peer_queued_list; #if ENABLE_DEBUG /* rx_entries waiting for data to arrive (large messages) */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_domain.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_domain.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_domain.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_domain.c 2021-04-01 23:01:34.000000000 +0200 @@ -97,9 +97,6 @@ rxr_domain = container_of(domain_fid, struct rxr_domain, util_domain.domain_fid.fid); - if (attr->iface == FI_HMEM_CUDA) - flags |= OFI_MR_NOCACHE; - ret = fi_mr_regattr(rxr_domain->rdm_domain, attr, flags, mr); if (ret) { FI_WARN(&rxr_prov, FI_LOG_MR, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_ep.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_ep.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_ep.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_ep.c 2021-04-01 23:01:34.000000000 +0200 @@ -1354,6 +1354,7 @@ dlist_init(&ep->tx_pending_list); dlist_init(&ep->read_pending_list); dlist_init(&ep->peer_backoff_list); + dlist_init(&ep->peer_queued_list); #if ENABLE_DEBUG dlist_init(&ep->rx_pending_list); dlist_init(&ep->rx_pkt_list); @@ -1613,6 +1614,23 @@ rxr_ep_check_peer_backoff_timer(ep); /* + * Resend handshake packet for any peers where the first + * handshake send failed. + */ + dlist_foreach_container_safe(&ep->peer_queued_list, + struct rxr_peer, peer, + queued_entry, tmp) { + + ret = rxr_pkt_post_handshake(ep, peer); + if (ret == -FI_EAGAIN) + break; + if (OFI_UNLIKELY(ret)) + goto handshake_err; + + dlist_remove(&peer->queued_entry); + } + + /* * Send any queued ctrl packets. */ dlist_foreach_container_safe(&ep->rx_entry_queued_list, @@ -1780,6 +1798,14 @@ assert(0 && "error writing err cq entry while handling RDMA error"); return; + +handshake_err: + FI_WARN(&rxr_prov, FI_LOG_EP_CTRL, + "Failed to post HANDSHAKE to peer %ld: %s\n", + peer->efa_fiaddr, fi_strerror(-ret)); + assert(0 && "Failed to post HANDSHAKE to peer"); + efa_eq_write_error(&ep->util_ep, FI_EIO, -ret); + return; } void rxr_ep_progress(struct util_ep *util_ep) @@ -1812,6 +1838,23 @@ && !(info->caps & FI_LOCAL_COMM)) return 0; + /* + * Currently, shm provider uses the SAR protocol for cuda + * memory buffer, whose performance is worse than using EFA device. + * + * To address this issue, shm usage is disabled if application + * requested the FI_HMEM capablity. + * + * This is not ideal, because host memory commuications are + * also going through device. + * + * The long term fix is make shm provider to support cuda + * buffers through cuda IPC. Once that is implemented, the + * following two lines need to be removed. + */ + if (info && (info->caps & FI_HMEM)) + return 0; + return rxr_env.enable_shm_transfer; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_init.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_init.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_init.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_init.c 2021-04-01 23:01:34.000000000 +0200 @@ -405,17 +405,6 @@ * which means FI_MR_HMEM implies FI_MR_LOCAL for cuda buffer */ if (hints->caps & FI_HMEM) { - /* - * XXX: remove this once CUDA IPC is supported by SHM - * and we have a fallback path to use the device when - * SHM doesn't support CUDA IPC. - */ - if (hints->caps & FI_LOCAL_COMM) { - FI_WARN(&rxr_prov, FI_LOG_CORE, - "FI_HMEM is currently not supported by the EFA provider when FI_LOCAL_COMM is requested.\n"); - return -FI_ENODATA; - } - info->caps &= ~FI_LOCAL_COMM; if (!efa_device_support_rdma_read()) { FI_WARN(&rxr_prov, FI_LOG_CORE, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_pkt_cmd.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_pkt_cmd.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_pkt_cmd.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_pkt_cmd.c 2021-04-01 23:01:34.000000000 +0200 @@ -867,8 +867,8 @@ #endif #endif peer = rxr_ep_get_peer(ep, pkt_entry->addr); - if (!(peer->flags & RXR_PEER_HANDSHAKE_SENT)) - rxr_pkt_post_handshake(ep, peer, pkt_entry->addr); + if (!(peer->flags & RXR_PEER_HANDSHAKE_SENT_OR_QUEUED)) + rxr_pkt_post_handshake_or_queue(ep, peer); if (peer->is_local) { assert(ep->use_shm); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_pkt_type.h new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_pkt_type.h --- old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_pkt_type.h 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_pkt_type.h 2021-04-01 23:01:34.000000000 +0200 @@ -157,9 +157,10 @@ struct rxr_pkt_entry *pkt_entry, fi_addr_t addr); -void rxr_pkt_post_handshake(struct rxr_ep *ep, - struct rxr_peer *peer, - fi_addr_t addr); +ssize_t rxr_pkt_post_handshake(struct rxr_ep *ep, struct rxr_peer *peer); + +void rxr_pkt_post_handshake_or_queue(struct rxr_ep *ep, + struct rxr_peer *peer); void rxr_pkt_handle_handshake_recv(struct rxr_ep *ep, struct rxr_pkt_entry *pkt_entry); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_pkt_type_misc.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_pkt_type_misc.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/efa/src/rxr/rxr_pkt_type_misc.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/efa/src/rxr/rxr_pkt_type_misc.c 2021-04-01 23:01:34.000000000 +0200 @@ -67,41 +67,69 @@ return 0; } -void rxr_pkt_post_handshake(struct rxr_ep *ep, - struct rxr_peer *peer, - fi_addr_t addr) +/** @brief Post a handshake packet to a peer. + * + * @param ep The endpoint on which the handshake packet is sent out. + * @param peer The peer to which the handshake packet is posted. + * @return 0 on success, fi_errno on error. + */ +ssize_t rxr_pkt_post_handshake(struct rxr_ep *ep, struct rxr_peer *peer) { struct rxr_pkt_entry *pkt_entry; + fi_addr_t addr; ssize_t ret; - assert(!(peer->flags & RXR_PEER_HANDSHAKE_SENT)); - + addr = peer->efa_fiaddr; pkt_entry = rxr_pkt_entry_alloc(ep, ep->tx_pkt_efa_pool); if (OFI_UNLIKELY(!pkt_entry)) - return; + return -FI_EAGAIN; rxr_pkt_init_handshake(ep, pkt_entry, addr); - /* - * TODO: Once we start using a core's selective completion capability, - * post the HANDSHAKE packets without FI_COMPLETION. - */ ret = rxr_pkt_entry_send(ep, pkt_entry, addr); - - /* - * Skip sending this handshake on error and try again when processing the - * next REQ from this peer containing the source information - */ if (OFI_UNLIKELY(ret)) { rxr_pkt_entry_release_tx(ep, pkt_entry); - if (ret == -FI_EAGAIN) - return; - FI_WARN(&rxr_prov, FI_LOG_CQ, - "Failed to send a HANDSHAKE packet: ret %zd\n", ret); - return; } + return ret; +} - peer->flags |= RXR_PEER_HANDSHAKE_SENT; +/** @brief Post a handshake packet to a peer. + * + * Note that if FI_EAGAIN is returned from the post of the handshake + * packet, the peer will be added into queue_peer_list for retry + * later. For other errors, we will hard fail. + * + * @param ep The endpoint on which the handshake packet is sent out. + * @param peer The peer to which the handshake packet is posted. + * @return Void. + */ +void rxr_pkt_post_handshake_or_queue(struct rxr_ep *ep, struct rxr_peer *peer) +{ + ssize_t ret; + + assert(!(peer->flags & RXR_PEER_HANDSHAKE_SENT_OR_QUEUED)); + + ret = rxr_pkt_post_handshake(ep, peer); + if (OFI_UNLIKELY(ret)) { + if (ret == -FI_EAGAIN) { + /* add peer to peer_queued_list for retry later */ + dlist_insert_tail(&peer->queued_entry, + &ep->peer_queued_list); + } else { + FI_WARN(&rxr_prov, FI_LOG_EP_CTRL, + "Failed to post HANDSHAKE to peer %ld: %s\n", + peer->efa_fiaddr, fi_strerror(-ret)); + assert(0 && "Failed to post HANDSHAKE to peer"); + efa_eq_write_error(&ep->util_ep, FI_EIO, -ret); + } + } + /* + * If rxr_pkt_post_handshake returns success or FI_EAGAIN, + * set the flag to RXR_PEER_HANDSHAKE_SENT_OR_QUEUED to + * avoid posting handshake packet multiple times to the + * same peer. + */ + peer->flags |= RXR_PEER_HANDSHAKE_SENT_OR_QUEUED; } void rxr_pkt_handle_handshake_recv(struct rxr_ep *ep, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/Makefile.include new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/Makefile.include --- old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/Makefile.include 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/Makefile.include 2021-04-01 23:01:34.000000000 +0200 @@ -29,7 +29,7 @@ chksum_srcs = $(_psm3_files) if HAVE_PSM3_SRC -_psm3_cflags = -mavx2 +_psm3_cflags = #include prov/psm3/psm3/Makefile.include _nodist_psm3_files = \ prov/psm3/src/psm3_revision.c @@ -265,31 +265,32 @@ libptl_self.la \ libpsm_hal_gen1.la -EXTRA_DIST += \ +_psm3_extra_dist = \ prov/psm3/psm3/include/rbtree.c \ prov/psm3/psm3/psm_hal_gen1/psm_hal_gen1_spio.c \ prov/psm3/psm3/opa/opa_dwordcpy-x86_64-fast.S +EXTRA_DIST += $(_psm3_extra_dist) chksum_srcs += \ $(libptl_am_la_SOURCES) $(libptl_ips_la_SOURCES) $(libptl_self_la_SOURCES) \ $(libuuid_la_SOURCES) $(libopa_la_SOURCES) $(libpsm_hal_gen1_la_SOURCES) \ - $(libpsm3i_la_SOURCES) $(EXTRA_DIST) + $(libpsm3i_la_SOURCES) $(_psm3_extra_dist) _psm3_LIBS = libpsm3i.la libpsm3_la_DEPENDENCIES = libpsm3i.la all-local: @echo "Building src checksum..."; \ - chksum=`cat $(chksum_srcs) | sha1sum | cut -d' ' -f 1`; \ - if ! grep -q $$chksum prov/psm3/src/psm3_revision.c 2>/dev/null; then \ - sed -i "/define PSMX3_SRC_CHECKSUM/s/\".*\"/\"$$chksum\"/" prov/psm3/src/psm3_revision.c; \ + chksum=`for file in $(chksum_srcs); do cat $(top_srcdir)/$$file; done | sha1sum | cut -d' ' -f 1`; \ + if ! grep -q $$chksum $(top_builddir)/prov/psm3/src/psm3_revision.c 2>/dev/null; then \ + sed -i "/define PSMX3_SRC_CHECKSUM/s/\".*\"/\"$$chksum\"/" $(top_builddir)/prov/psm3/src/psm3_revision.c; \ echo "SRC checksum updated to $$chksum"; \ + timestamp=`date`; \ + sed -i "/define PSMX3_BUILD_TIMESTAMP/s/\".*\"/\"$$timestamp\"/" $(top_builddir)/prov/psm3/src/psm3_revision.c; \ + echo "Updated build timestamp: $$timestamp"; \ else \ echo "SRC checksum not changed: $$chksum"; \ - fi; \ - timestamp=`date`; \ - sed -i "/define PSMX3_BUILD_TIMESTAMP/s/\".*\"/\"$$timestamp\"/" prov/psm3/src/psm3_revision.c; \ - echo "Updated build timestamp: $$timestamp" + fi endif HAVE_PSM3_SRC @@ -315,6 +316,7 @@ src_libfabric_la_LIBADD += libpsm3.la src_libfabric_la_DEPENDENCIES += libpsm3.la +if HAVE_PSM2_SRC .libs/libpsm3_full.lo: $(libpsm3_la_OBJECTS) $(libpsm3_la_DEPENDENCIES) $(EXTRA_libpsm3_la_DEPENDENCIES) @sed -i.bak "/dependency_libs/s/='.*'/=''/" libpsm3i.la $(AM_V_CCLD)$(libpsm3_la_LINK) -r $(am_libpsm3_la_rpath) $(libpsm3_la_OBJECTS) libpsm3i.la @@ -326,7 +328,9 @@ @mv libpsm3i.la.bak libpsm3i.la $(AM_V_CCLD)$(libpsm3_la_LINK) $(am_libpsm3_la_rpath) $(libpsm3_la_OBJECTS) $(libpsm3_la_LIBADD) $(LIBS); \ rm -f .libs/libpsm3.a libpsm3.a; \ - ar cru .libs/libpsm3.a .libs/libpsm3_exp.o + $(AR) cru .libs/libpsm3.a .libs/libpsm3_exp.o; \ + $(RANLIB) .libs/libpsm3.a +endif HAVE_PSM2_SRC endif !HAVE_PSM3_DL diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/configure.m4 new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/configure.m4 --- old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/configure.m4 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/configure.m4 2021-04-01 23:01:34.000000000 +0200 @@ -65,18 +65,22 @@ [psm3_happy=0]) AC_MSG_CHECKING([for -msse4.2 support]) + + dnl Strip other optflags to avoid conflicts when checking for instruction sets + FI_STRIP_OPTFLAGS($CFLAGS) + PSM3_STRIP_OPTFLAGS="$s_result" + save_CFLAGS=$CFLAGS - CFLAGS="$CFLAGS -msse4.2" + CFLAGS="$PSM3_STRIP_OPTFLAGS -msse4.2 -O0" AC_LINK_IFELSE( [AC_LANG_PROGRAM( - [#include <nmmintrin.h>], - [unsigned int crc = 0; - crc = _mm_crc32_u32(crc, 0); - return crc == 0;]) + [[#include <nmmintrin.h>]], + [[unsigned int crc = 0; + crc = _mm_crc32_u32(crc, 0); + return crc == 0;]]) ],[ AC_MSG_RESULT([yes]) - psm3_crc_happy=1 - ARCH_CFLAGS="-msse4.2" + PSM3_ARCH_CFLAGS="-msse4.2" ],[ psm3_happy=0 AC_MSG_RESULT([no]) @@ -86,18 +90,18 @@ AC_MSG_CHECKING([for -mavx support]) save_CFLAGS=$CFLAGS - CFLAGS="$CFLAGS -mavx" + CFLAGS="$PSM3_STRIP_OPTFLAGS -mavx -O0" AC_LINK_IFELSE( [AC_LANG_PROGRAM( - [#include <immintrin.h>], - [unsigned long long *vec_a = {1,2,3,4}; - __m256i *sp = (__m256i *)vec_a; - __m256i vec = _mm256_load_si256(sp); - return 0;]) + [[#include <immintrin.h>]], + [[unsigned long long _a[4] = {1ULL,2ULL,3ULL,4ULL}; + __m256i vA = _mm256_loadu_si256((__m256i *)_a); + __m256i vB; + _mm256_store_si256(&vB, vA); + return 0;]]) ],[ AC_MSG_RESULT([yes]) - psm3_256_happy=1 - ARCH_CFLAGS="-mavx" + PSM3_ARCH_CFLAGS="-mavx" ],[ psm3_happy=0 AC_MSG_RESULT([no]) @@ -105,6 +109,25 @@ ]) CFLAGS=$save_CFLAGS + AC_MSG_CHECKING([for -mavx2 support]) + save_CFLAGS=$CFLAGS + CFLAGS="$PSM3_STRIP_OPTFLAGS -mavx2 -O0" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[#include <immintrin.h>]], + [[unsigned long long _a[4] = {1ULL,2ULL,3ULL,4ULL}; + __m256i vA = _mm256_loadu_si256((__m256i *)_a); + __m256i vB = _mm256_add_epi64(vA, vA); + (void)vB; + return 0;]]) + ],[ + AC_MSG_RESULT([yes]) + PSM3_ARCH_CFLAGS="-mavx2" + ],[ + AC_MSG_RESULT([no]) + ]) + CFLAGS=$save_CFLAGS + AS_IF([test x$with_psm3_rv = xno], [psm3_CPPFLAGS="$psm3_CPPFLAGS -URNDV_MOD"], [ @@ -153,8 +176,11 @@ [AC_LANG_PROGRAM( [[#include <sys/types.h> #include <stdint.h> - #include <rdma/rv_user_ioctls.h> - ]],[[struct rv_ring_header ring; ring.overflow_cnt=0;]]) + #include <rdma/rv_user_ioctls.h>]], + [[struct rv_ring_header ring; + ring.overflow_cnt=0; + (void)ring; + return 0;]]) ],[ AC_MSG_RESULT(yes) ],[ @@ -172,7 +198,7 @@ AS_IF([test $psm3_happy -eq 1], [$1], [$2]) - psm3_CFLAGS="$ARCH_CFLAGS" + psm3_CFLAGS="$PSM3_ARCH_CFLAGS" psm3_CPPFLAGS="$psm3_CPPFLAGS $psm3_rt_CPPFLAGS $psm3_dl_CPPFLAGS $psm3_numa_CPPFLAGS $psm3_ibv_CPPFLAGS" psm3_LDFLAGS="$psm3_LDFLAGS $psm3_rt_LDFLAGS $psm3_dl_LDFLAGS $psm3_numa_LDFLAGS $psm3_ibv_LDFLAGS" psm3_LIBS="$psm3_LIBS $psm3_rt_LIBS $psm3_dl_LIBS $psm3_numa_LIBS $psm3_ibv_LIBS" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/psm3/include/opa_user.h new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/psm3/include/opa_user.h --- old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/psm3/include/opa_user.h 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/psm3/include/opa_user.h 2021-04-01 23:01:34.000000000 +0200 @@ -82,6 +82,11 @@ #include "opa_udebug.h" #include "opa_service.h" +#ifndef PACK_SUFFIX +/* XXX gcc only */ +#define PACK_SUFFIX __attribute__((packed)) +#endif + #define HFI_TF_NFLOWS 32 // The sender uses an RDMA Write with Immediate. The immediate data diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/psm3/psm_rndv_mod.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/psm3/psm_rndv_mod.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/psm3/psm_rndv_mod.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/psm3/psm_rndv_mod.c 2021-04-01 23:01:34.000000000 +0200 @@ -65,8 +65,6 @@ #include <ctype.h> /* isalpha */ //#include <netdb.h> #include <infiniband/verbs.h> -//#include <infiniband/ib.h> // for AF_IB structures -//#include <rdma/rdma_verbs.h> #include "psm_user.h" // get psmi_calloc and free #include "psm_rndv_mod.h" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/psm3/psm_utils.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/psm3/psm_utils.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/psm3/psm_utils.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/psm3/psm_utils.c 2021-04-01 23:01:34.000000000 +0200 @@ -59,7 +59,6 @@ #include "psm_mq_internal.h" #include "ips_proto_params.h" #include <netinet/in.h> // for sockaddr -#include <infiniband/ib.h> // for AF_IB structures #include <fnmatch.h> @@ -417,7 +416,6 @@ // superset of inet_ntop. For AF_INET and AF_INET6 outputs address and port -// for AF_IB outputs address sid and pkey const char *psmi_sockaddr_ntop(struct sockaddr* addr, char *dst, socklen_t size) { if (! dst || size < PSM_ADDRSTRLEN) { @@ -449,15 +447,6 @@ snprintf(dst+strlen(dst), size-strlen(dst), " %u", be16toh(in_addr->sin6_port)); return dst; } - case AF_IB: - { - struct sockaddr_ib* ib_addr = ((struct sockaddr_ib*)addr); - // we show the GID sid and pkey. - // Could also output sid_mask and sib_scope_id - inet_ntop(AF_INET6, &ib_addr->sib_addr, dst, size); - snprintf(dst+strlen(dst), size-strlen(dst), " 0x%016"PRIx64" 0x%04"PRIx16, be64toh(ib_addr->sib_sid), be16toh(ib_addr->sib_pkey)); - return dst; - } default: snprintf(dst, size, "Unsupported"); return dst; @@ -497,8 +486,6 @@ return (sizeof(struct sockaddr_in)); case AF_INET6: return (sizeof(struct sockaddr_in6)); - case AF_IB: - return (sizeof(struct sockaddr_ib)); default: // unknown return 0; // be conservative diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/psm3/psm_verbs_ep.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/psm3/psm_verbs_ep.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/psm3/psm3/psm_verbs_ep.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/psm3/psm3/psm_verbs_ep.c 2021-04-01 23:01:34.000000000 +0200 @@ -59,7 +59,6 @@ #include <sched.h> /* cpu_set */ #include <ctype.h> /* isalpha */ #include <netdb.h> -#include <infiniband/ib.h> // for AF_IB #include <ifaddrs.h> #include <sys/socket.h> #include <netinet/in.h> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/prov/util/src/util_mem_monitor.c new/libfabric-1.12.1.0.08c7a6af92d3/prov/util/src/util_mem_monitor.c --- old/libfabric-1.12.0.0.b5c35d115b31/prov/util/src/util_mem_monitor.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/prov/util/src/util_mem_monitor.c 2021-04-01 23:01:34.000000000 +0200 @@ -36,6 +36,7 @@ #include <ofi_mr.h> #include <unistd.h> +#include "ofi_hmem.h" pthread_mutex_t mm_lock = PTHREAD_MUTEX_INITIALIZER; pthread_rwlock_t mm_list_rwlock = PTHREAD_RWLOCK_INITIALIZER; @@ -208,6 +209,9 @@ for (iface = FI_HMEM_SYSTEM; iface < OFI_HMEM_MAX; iface++) { cache->monitors[iface] = NULL; + if (!hmem_ops[iface].initialized) + continue; + monitor = monitors[iface]; if (!monitor) { FI_DBG(&core_prov, FI_LOG_MR, @@ -218,9 +222,7 @@ if (dlist_empty(&monitor->list)) { ret = monitor->start(monitor); - if (ret == -FI_ENOSYS) - continue; - else if (ret) + if (ret) goto err; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/src/fabric.c new/libfabric-1.12.1.0.08c7a6af92d3/src/fabric.c --- old/libfabric-1.12.0.0.b5c35d115b31/src/fabric.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/src/fabric.c 2021-04-01 23:01:34.000000000 +0200 @@ -362,8 +362,8 @@ static void ofi_ordered_provs_init(void) { char *ordered_prov_names[] = { - "psm3", "psm2", "psm", "efa", "usnic", "gni", "bgq", "verbs", - "netdir", "ofi_rxm", "ofi_rxd", "shm", + "efa", "psm2", "psm", "usnic", "gni", "bgq", "verbs", + "netdir", "psm3", "ofi_rxm", "ofi_rxd", "shm", /* Initialize the socket based providers last of the * standard providers. This will result in them being * the least preferred providers. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/src/hmem.c new/libfabric-1.12.1.0.08c7a6af92d3/src/hmem.c --- old/libfabric-1.12.0.0.b5c35d115b31/src/hmem.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/src/hmem.c 2021-04-01 23:01:34.000000000 +0200 @@ -39,24 +39,7 @@ #include "ofi.h" #include "ofi_iov.h" -struct ofi_hmem_ops { - bool initialized; - int (*init)(void); - int (*cleanup)(void); - int (*copy_to_hmem)(uint64_t device, void *dest, const void *src, - size_t size); - int (*copy_from_hmem)(uint64_t device, void *dest, const void *src, - size_t size); - bool (*is_addr_valid)(const void *addr); - int (*get_handle)(void *dev_buf, void **handle); - int (*open_handle)(void **handle, uint64_t device, void **ipc_ptr); - int (*close_handle)(void *ipc_ptr); - int (*host_register)(void *ptr, size_t size); - int (*host_unregister)(void *ptr); - int (*get_base_addr)(const void *ptr, void **base); -}; - -static struct ofi_hmem_ops hmem_ops[] = { +struct ofi_hmem_ops hmem_ops[] = { [FI_HMEM_SYSTEM] = { .initialized = false, .init = ofi_hmem_init_noop, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.12.0.0.b5c35d115b31/src/hmem_cuda.c new/libfabric-1.12.1.0.08c7a6af92d3/src/hmem_cuda.c --- old/libfabric-1.12.0.0.b5c35d115b31/src/hmem_cuda.c 2021-03-08 21:10:06.000000000 +0100 +++ new/libfabric-1.12.1.0.08c7a6af92d3/src/hmem_cuda.c 2021-04-01 23:01:34.000000000 +0200 @@ -300,6 +300,7 @@ int cuda_hmem_init(void) { int ret; + int gdrcopy_ret; ret = cuda_hmem_dl_init(); if (ret != FI_SUCCESS) @@ -309,8 +310,8 @@ if (ret != FI_SUCCESS) goto dl_cleanup; - ret = cuda_gdrcopy_hmem_init(); - if (ret == FI_SUCCESS) { + gdrcopy_ret = cuda_gdrcopy_hmem_init(); + if (gdrcopy_ret == FI_SUCCESS) { hmem_cuda_use_gdrcopy = 1; fi_param_define(NULL, "hmem_cuda_use_gdrcopy", FI_PARAM_BOOL, "Use gdrcopy to copy data to/from GPU memory"); @@ -318,7 +319,7 @@ &hmem_cuda_use_gdrcopy); } else { hmem_cuda_use_gdrcopy = 0; - if (ret != -FI_ENOSYS) + if (gdrcopy_ret != -FI_ENOSYS) FI_WARN(&core_prov, FI_LOG_CORE, "gdrcopy initialization failed! gdrcopy will not be used.\n"); } @@ -334,7 +335,8 @@ int cuda_hmem_cleanup(void) { cuda_hmem_dl_cleanup(); - cuda_gdrcopy_hmem_cleanup(); + if (hmem_cuda_use_gdrcopy) + cuda_gdrcopy_hmem_cleanup(); return FI_SUCCESS; }