Hello community, here is the log from the commit of package libfabric for openSUSE:Factory checked in at 2017-11-23 09:34:21 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/libfabric (Old) and /work/SRC/openSUSE:Factory/.libfabric.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "libfabric" Thu Nov 23 09:34:21 2017 rev:9 rq:544094 version:1.5.2 Changes: -------- --- /work/SRC/openSUSE:Factory/libfabric/libfabric.changes 2017-11-09 13:50:53.646203148 +0100 +++ /work/SRC/openSUSE:Factory/.libfabric.new/libfabric.changes 2017-11-23 09:34:24.374423539 +0100 @@ -1,0 +2,16 @@ +Mon Nov 20 16:27:13 UTC 2017 - nmoreychaisemar...@suse.com + +- Update to v1.5.2 + - Core + - Fix Power PC 32-bit build + - Sockets + - Fix incorrect reporting of counter attributes + - Verbs + - Fix reporting attributes based on device limits + - Fix incorrect CQ size reported for iWarp NICs + - Update man page with known issues for specific NICs + - Fix FI_RX_CQ_DATA mode check + - Disable on-demand paging by default (can cause data corruption) + - Disable loopback (localhost) addressing (causing failures in MPI) + +------------------------------------------------------------------- Old: ---- libfabric-1.5.1.0.476d147d.tar.bz2 New: ---- libfabric-1.5.2.0.480a6db3.tar.bz2 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ libfabric.spec ++++++ --- /var/tmp/diff_new_pack.cDhwfz/_old 2017-11-23 09:34:25.222392601 +0100 +++ /var/tmp/diff_new_pack.cDhwfz/_new 2017-11-23 09:34:25.222392601 +0100 @@ -17,13 +17,13 @@ # -%define git_ver .0.476d147d +%define git_ver .0.480a6db3 Name: libfabric Summary: User-space RDMA Fabric Interfaces License: GPL-2.0 or BSD-2-Clause Group: Development/Libraries/C and C++ -Version: 1.5.1 +Version: 1.5.2 Release: 0 Source: %{name}-%{version}%{git_ver}.tar.bz2 Source1: baselibs.conf ++++++ _service ++++++ --- /var/tmp/diff_new_pack.cDhwfz/_old 2017-11-23 09:34:25.254391432 +0100 +++ /var/tmp/diff_new_pack.cDhwfz/_new 2017-11-23 09:34:25.258391287 +0100 @@ -8,7 +8,7 @@ <param name="versionformat">@PARENT_TAG@.@TAG_OFFSET@.%h</param> <param name="versionrewrite-pattern">v(.*)</param> <param name="versionrewrite-replacement">\1</param> - <param name="revision">476d147da5a010faae571f6f46585c777a141474</param> + <param name="revision">480a6db351fbe8ee38077902c8df875e3cd13205</param> </service> <service name="recompress" mode="disabled"> <param name="file">libfabric*.tar</param> ++++++ libfabric-1.5.1.0.476d147d.tar.bz2 -> libfabric-1.5.2.0.480a6db3.tar.bz2 ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/.appveyor.yml new/libfabric-1.5.2.0.480a6db3/.appveyor.yml --- old/libfabric-1.5.1.0.476d147d/.appveyor.yml 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/.appveyor.yml 2017-11-08 22:12:47.000000000 +0100 @@ -17,6 +17,7 @@ before_test: - git clone https://github.com/ofiwg/fabtests - cd fabtests + - git checkout -b v1.5.x origin/v1.5.x - msbuild fabtests.sln test_script: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/.travis.yml new/libfabric-1.5.2.0.480a6db3/.travis.yml --- old/libfabric-1.5.1.0.476d147d/.travis.yml 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/.travis.yml 2017-11-08 22:12:47.000000000 +0100 @@ -70,6 +70,7 @@ script: - git clone https://github.com/ofiwg/fabtests.git - cd fabtests + - git checkout -b v1.5.x origin/v1.5.x - ./autogen.sh - ./configure --prefix=$PREFIX --with-libfabric=$PREFIX - make -j2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/Makefile.am new/libfabric-1.5.2.0.480a6db3/Makefile.am --- old/libfabric-1.5.1.0.476d147d/Makefile.am 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/Makefile.am 2017-11-08 22:12:47.000000000 +0100 @@ -135,7 +135,7 @@ src_libfabric_la_DEPENDENCIES = libfabric.map if !EMBEDDED -src_libfabric_la_LDFLAGS += -version-info 10:1:9 +src_libfabric_la_LDFLAGS += -version-info 10:2:9 endif src_libfabric_la_LDFLAGS += -export-dynamic \ $(libfabric_version_script) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/NEWS.md new/libfabric-1.5.2.0.480a6db3/NEWS.md --- old/libfabric-1.5.1.0.476d147d/NEWS.md 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/NEWS.md 2017-11-08 22:12:47.000000000 +0100 @@ -5,6 +5,32 @@ bug fixes (and other actions) for each version of Libfabric since version 1.0. +v1.5.2, Wed Nov 8, 2017 +======================= + +## Core + +- Fix Power PC 32-bit build + +## RXM + +-- Remove dependency on shared receive contexts +-- Switch to automatic data progress +-- Fix removing addresses from AV + +## Sockets + +-- Fix incorrect reporting of counter attributes + +## Verbs + +-- Fix reporting attributes based on device limits +-- Fix incorrect CQ size reported for iWarp NICs +-- Update man page with known issues for specific NICs +-- Fix FI_RX_CQ_DATA mode check +-- Disable on-demand paging by default (can cause data corruption) +-- Disable loopback (localhost) addressing (causing failures in MPI) + v1.5.1, Wed Oct 4, 2017 ======================= diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/configure.ac new/libfabric-1.5.2.0.480a6db3/configure.ac --- old/libfabric-1.5.1.0.476d147d/configure.ac 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/configure.ac 2017-11-08 22:12:47.000000000 +0100 @@ -4,7 +4,7 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ([2.60]) -AC_INIT([libfabric], [1.5.1], [of...@lists.openfabrics.org]) +AC_INIT([libfabric], [1.5.2], [of...@lists.openfabrics.org]) AC_CONFIG_SRCDIR([src/fabric.c]) AC_CONFIG_AUX_DIR(config) AC_CONFIG_MACRO_DIR(config) @@ -165,7 +165,11 @@ [int32_t a; __sync_add_and_fetch(&a, 0); __sync_sub_and_fetch(&a, 0); + #if defined(__PPC__) && !defined(__PPC64__) + #error compiler built-in atomics are not supported on PowerPC 32-bit + #else return 0; + #endif ], [ AC_MSG_RESULT(yes) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/man/fi_rxm.7.md new/libfabric-1.5.2.0.480a6db3/man/fi_rxm.7.md --- old/libfabric-1.5.1.0.476d147d/man/fi_rxm.7.md 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/man/fi_rxm.7.md 2017-11-08 22:12:47.000000000 +0100 @@ -85,11 +85,28 @@ * fi_cq_sread, fi_cq_sreadfrom and fi_cq_signal calls. +## Auto progress + +When sending large messages, an app doing an sread or waiting on the CQ file descriptor +may not get a completion when reading the CQ after being woken up from the wait. +The app has to do sread or wait on the file descriptor again. + ## Usage limitations RxM provider should work fine for client - server programs like fabtests. Support for MPI, SHMEM and other applications is work in progress. +## Known failures in ofiwg/fabtests + * fi_rdm_tagged_peek + +### HW Specific + +#### iWARP + * fi_poll -t queue + * fi_rma_bw -e rdm -o write + * fi_rma_bw -e rdm -o writedata + * fi_rdm_rma -o writedata + # RUNTIME PARAMETERS No runtime parameters are currently defined. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/man/fi_verbs.7.md new/libfabric-1.5.2.0.480a6db3/man/fi_verbs.7.md --- old/libfabric-1.5.1.0.476d147d/man/fi_verbs.7.md 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/man/fi_verbs.7.md 2017-11-08 22:12:47.000000000 +0100 @@ -191,6 +191,18 @@ memory leak in functions from dependent libraries (e.g. libibverbs, librdmacm). These leaks are safe to ignore. +## Known failures in ofiwg/fabtests + + * fi_rma_bw -o writedata -e rdm + +### HW Specific + +#### iWARP + + * fi_cm_data + * fi_rma_bw -e rdm -o read + * fi_rdm_rma -o writedata + # SEE ALSO [`fabric`(7)](fabric.7.html), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm.h new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm.h --- old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm.h 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm.h 2017-11-08 22:12:47.000000000 +0100 @@ -355,6 +355,8 @@ enum ofi_cmap_signal signal); int rxm_ep_repost_buf(struct rxm_rx_buf *buf); +int rxm_ep_prepost_buf(struct rxm_ep *rxm_ep, struct fid_ep *msg_ep); + int ofi_match_addr(fi_addr_t addr, fi_addr_t match_addr); int ofi_match_tag(uint64_t tag, uint64_t ignore, uint64_t match_tag); void rxm_pkt_init(struct rxm_pkt *pkt); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm_attr.c new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm_attr.c --- old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm_attr.c 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm_attr.c 2017-11-08 22:12:47.000000000 +0100 @@ -36,16 +36,23 @@ FI_READ | FI_WRITE | FI_RECV | FI_SEND | \ FI_REMOTE_READ | FI_REMOTE_WRITE | FI_SOURCE) +/* Since we are a layering provider, the attributes for which we rely on the + * core provider are set to full capability. This ensures that ofix_getinfo + * check hints succeeds and the core provider can accept / reject any capability + * requested by the app. */ + struct fi_tx_attr rxm_tx_attr = { .caps = RXM_EP_CAPS, - .comp_order = FI_ORDER_STRICT, + .msg_order = ~0x0, + .comp_order = ~0x0, .size = SIZE_MAX, .iov_limit = RXM_IOV_LIMIT, }; struct fi_rx_attr rxm_rx_attr = { .caps = RXM_EP_CAPS, - .comp_order = FI_ORDER_STRICT, + .msg_order = ~0x0, + .comp_order = FI_ORDER_STRICT | FI_ORDER_DATA, .size = 1024, .iov_limit= RXM_IOV_LIMIT, }; @@ -62,7 +69,7 @@ struct fi_domain_attr rxm_domain_attr = { .threading = FI_THREAD_SAFE, .control_progress = FI_PROGRESS_AUTO, - .data_progress = FI_PROGRESS_MANUAL, + .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, /* Advertise support for FI_MR_BASIC so that ofi_check_info call diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm_conn.c new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm_conn.c --- old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm_conn.c 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm_conn.c 2017-11-08 22:12:47.000000000 +0100 @@ -58,10 +58,13 @@ goto err; } - ret = fi_ep_bind(msg_ep, &rxm_ep->srx_ctx->fid, 0); - if (ret) { - FI_WARN(&rxm_prov, FI_LOG_FABRIC, "Unable to bind msg EP to shared RX ctx\n"); - goto err; + if (rxm_ep->srx_ctx) { + ret = fi_ep_bind(msg_ep, &rxm_ep->srx_ctx->fid, 0); + if (ret) { + FI_WARN(&rxm_prov, FI_LOG_FABRIC, + "Unable to bind msg EP to shared RX ctx\n"); + goto err; + } } // TODO add other completion flags @@ -77,6 +80,13 @@ FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "Unable to enable msg_ep\n"); goto err; } + + if (!rxm_ep->srx_ctx) { + ret = rxm_ep_prepost_buf(rxm_ep, msg_ep); + if (ret) + goto err; + } + rxm_conn->msg_ep = msg_ep; return 0; err: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm_ep.c new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm_ep.c --- old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm_ep.c 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm_ep.c 2017-11-08 22:12:47.000000000 +0100 @@ -283,14 +283,14 @@ rx_buf->hdr.state = RXM_RX; rx_buf->ep = rxm_ep; - ret = fi_recv(rx_buf->ep->srx_ctx, &rx_buf->pkt, RXM_BUF_SIZE, + ret = fi_recv(rx_buf->hdr.msg_ep, &rx_buf->pkt, RXM_BUF_SIZE, rx_buf->hdr.desc, FI_ADDR_UNSPEC, rx_buf); if (ret) FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "Unable to repost buf\n"); return ret; } -int rxm_ep_prepost_buf(struct rxm_ep *rxm_ep) +int rxm_ep_prepost_buf(struct rxm_ep *rxm_ep, struct fid_ep *msg_ep) { struct rxm_rx_buf *rx_buf; int ret; @@ -298,8 +298,8 @@ for (i = 0; i < rxm_ep->msg_info->rx_attr->size; i++) { rx_buf = (struct rxm_rx_buf *)rxm_buf_get(&rxm_ep->rx_pool); - rx_buf->hdr.state = RXM_RX, - rx_buf->hdr.msg_ep = rxm_ep->srx_ctx; + rx_buf->hdr.state = RXM_RX; + rx_buf->hdr.msg_ep = msg_ep; rx_buf->ep = rxm_ep; ret = rxm_ep_repost_buf(rx_buf); if (ret) { @@ -612,6 +612,7 @@ struct fid_mr **mr_iov; size_t pkt_size = 0; ssize_t size; + uint8_t progress = 0; int ret; rxm_ep = container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid); @@ -711,6 +712,7 @@ /* release allocated buffer for further reuse */ goto done; } else { + progress = 1; FI_DBG(&rxm_prov, FI_LOG_EP_DATA, "passed data (size = %d) is too " "big for MSG provider (max inject size = %d) \n", (int)pkt_size, rxm_ep->msg_info->tx_attr->inject_size); @@ -719,9 +721,13 @@ ret = fi_send(rxm_conn->msg_ep, pkt, pkt_size, tx_buf->hdr.desc, 0, tx_entry); if (ret) { - if (ret != -FI_EAGAIN) + if ((ret == -FI_EAGAIN) && progress) { + progress = 0; + rxm_cq_progress(rxm_ep); + } else { FI_WARN(&rxm_prov, FI_LOG_EP_DATA, "fi_send for MSG provider failed\n"); + } goto done; } return 0; @@ -936,10 +942,13 @@ retv = ret; } - ret = fi_close(&rxm_ep->srx_ctx->fid); - if (ret) { - FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "Unable to close msg shared ctx\n"); - retv = ret; + if (rxm_ep->srx_ctx) { + ret = fi_close(&rxm_ep->srx_ctx->fid); + if (ret) { + FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, \ + "Unable to close msg shared ctx\n"); + retv = ret; + } } fi_freeinfo(rxm_ep->msg_info); @@ -1064,11 +1073,13 @@ if (!rxm_ep->util_ep.av) return -FI_EOPBADSTATE; - ret = rxm_ep_prepost_buf(rxm_ep); - if (ret) { - FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, + if (rxm_ep->srx_ctx) { + ret = rxm_ep_prepost_buf(rxm_ep, rxm_ep->srx_ctx); + if (ret) { + FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "Unable to prepost recv bufs\n"); - return ret; + return ret; + } } break; default: @@ -1128,6 +1139,35 @@ return ret; } +static int rxm_info_to_core_srx_ctx(uint32_t version, struct fi_info *rxm_hints, + struct fi_info *core_hints) +{ + int ret; + + ret = rxm_info_to_core(version, rxm_hints, core_hints); + if (ret) + return ret; + core_hints->ep_attr->rx_ctx_cnt = FI_SHARED_CONTEXT; + return 0; +} + +static int rxm_ep_get_core_info(uint32_t version, struct fi_info *hints, + struct fi_info **info) +{ + int ret; + + ret = ofi_get_core_info(version, NULL, NULL, 0, &rxm_util_prov, hints, + rxm_info_to_core_srx_ctx, info); + if (!ret) + return 0; + + FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, "Shared receive context not " + "supported by MSG provider.\n"); + + return ofi_get_core_info(version, NULL, NULL, 0, &rxm_util_prov, hints, + rxm_info_to_core, info); +} + static int rxm_ep_msg_res_open(struct fi_info *rxm_fi_info, struct util_domain *util_domain, struct rxm_ep *rxm_ep) { @@ -1135,9 +1175,8 @@ struct fi_cq_attr cq_attr; int ret; - ret = ofi_get_core_info(util_domain->fabric->fabric_fid.api_version, - NULL, NULL, 0, &rxm_util_prov, rxm_fi_info, - rxm_info_to_core, &rxm_ep->msg_info); + ret = rxm_ep_get_core_info(util_domain->fabric->fabric_fid.api_version, + rxm_fi_info, &rxm_ep->msg_info); if (ret) return ret; @@ -1156,11 +1195,14 @@ goto err1; } - ret = fi_srx_context(rxm_domain->msg_domain, rxm_ep->msg_info->rx_attr, - &rxm_ep->srx_ctx, NULL); - if (ret) { - FI_WARN(&rxm_prov, FI_LOG_FABRIC, "Unable to open shared receive context\n"); - goto err2; + if (rxm_ep->msg_info->ep_attr->rx_ctx_cnt == FI_SHARED_CONTEXT) { + ret = fi_srx_context(rxm_domain->msg_domain, rxm_ep->msg_info->rx_attr, + &rxm_ep->srx_ctx, NULL); + if (ret) { + FI_WARN(&rxm_prov, FI_LOG_EP_CTRL, + "Unable to open shared receive context\n"); + goto err2; + } } ret = rxm_listener_open(rxm_ep); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm_init.c new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm_init.c --- old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm_init.c 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm_init.c 2017-11-08 22:12:47.000000000 +0100 @@ -63,6 +63,10 @@ } core_info->domain_attr->caps |= hints->domain_attr->caps; } + if (hints->tx_attr) { + core_info->tx_attr->msg_order = hints->tx_attr->msg_order; + core_info->tx_attr->comp_order = hints->tx_attr->comp_order; + } } else { /* Since hints is NULL fake support for FI_MR_BASIC to allow * discovery of core providers like verbs which require it */ @@ -73,7 +77,10 @@ * FI_MR_SCALABLE aren't dropped */ core_info->domain_attr->mr_mode = FI_MR_UNSPEC; } - core_info->ep_attr->rx_ctx_cnt = FI_SHARED_CONTEXT; + + /* Remove caps that RxM can handle */ + core_info->rx_attr->msg_order &= ~FI_ORDER_SAS; + core_info->ep_attr->type = FI_EP_MSG; return 0; @@ -87,6 +94,9 @@ *info->tx_attr = *rxm_info.tx_attr; + info->tx_attr->msg_order = core_info->tx_attr->msg_order; + info->tx_attr->comp_order = core_info->tx_attr->comp_order; + /* Export TX queue size same as that of MSG provider as we post TX * operations directly */ info->tx_attr->size = core_info->tx_attr->size; @@ -98,6 +108,9 @@ *info->rx_attr = *rxm_info.rx_attr; info->rx_attr->iov_limit = MIN(info->rx_attr->iov_limit, core_info->rx_attr->iov_limit); + /* Only SAS recv ordering can be guaranteed as RMA ops are not handled + * by RxM protocol */ + info->rx_attr->msg_order |= FI_ORDER_SAS; *info->ep_attr = *rxm_info.ep_attr; info->ep_attr->max_msg_size = core_info->ep_attr->max_msg_size; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm_rma.c new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm_rma.c --- old/libfabric-1.5.1.0.476d147d/prov/rxm/src/rxm_rma.c 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/prov/rxm/src/rxm_rma.c 2017-11-08 22:12:47.000000000 +0100 @@ -183,10 +183,12 @@ tx_buf = (struct rxm_tx_buf *)rxm_buf_get(&rxm_ep->tx_pool); if (!tx_buf) { FI_WARN(&rxm_prov, FI_LOG_CQ, "TX queue full!\n"); + rxm_cq_progress(rxm_ep); return -FI_EAGAIN; } if (!(tx_entry = rxm_tx_entry_get(&rxm_ep->send_queue))) { + rxm_cq_progress(rxm_ep); ret = -FI_EAGAIN; goto err1; } @@ -216,8 +218,11 @@ flags = (flags & ~FI_INJECT) | FI_COMPLETION; ret = fi_writemsg(msg_ep, &msg_rma, flags); - if (ret) + if (ret) { + if (ret == -FI_EAGAIN) + rxm_cq_progress(rxm_ep); goto err2; + } return 0; err2: rxm_tx_entry_release(&rxm_ep->send_queue, tx_entry); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/prov/sockets/src/sock_cntr.c new/libfabric-1.5.2.0.480a6db3/prov/sockets/src/sock_cntr.c --- old/libfabric-1.5.1.0.476d147d/prov/sockets/src/sock_cntr.c 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/prov/sockets/src/sock_cntr.c 2017-11-08 22:12:47.000000000 +0100 @@ -512,7 +512,7 @@ goto err; if (attr == NULL) - memcpy(&_cntr->attr, &sock_cntr_add, sizeof(sock_cntr_attr)); + memcpy(&_cntr->attr, &sock_cntr_attr, sizeof(sock_cntr_attr)); else memcpy(&_cntr->attr, attr, sizeof(sock_cntr_attr)); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/prov/util/src/util_av.c new/libfabric-1.5.2.0.480a6db3/prov/util/src/util_av.c --- old/libfabric-1.5.1.0.476d147d/prov/util/src/util_av.c 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/prov/util/src/util_av.c 2017-11-08 22:12:47.000000000 +0100 @@ -307,27 +307,27 @@ */ static void util_av_hash_remove(struct util_av_hash *hash, int slot, int index) { - int i; + int i, slot_next; if (slot < 0 || slot >= hash->slots) return; - if (slot == index) { + if (hash->table[slot].index == index) { if (hash->table[slot].next == UTIL_NO_ENTRY) { hash->table[slot].index = UTIL_NO_ENTRY; return; - } else { - index = hash->table[slot].next; - hash->table[slot] = hash->table[index]; } } else { - for (i = slot; hash->table[i].next != index; ) + for (i = slot; hash->table[i].index != index; ) i = hash->table[i].next; - - hash->table[i].next = hash->table[index].next; + slot = i; } - hash->table[index].next = hash->free_list; - hash->free_list = index; + + slot_next = hash->table[slot].next; + hash->table[slot] = hash->table[slot_next]; + + hash->table[slot_next].next = hash->free_list; + hash->free_list = slot_next; } int ofi_av_remove_addr(struct util_av *av, int slot, int index) @@ -361,7 +361,7 @@ dlist_foreach(&av->ep_list, av_entry) { ep = container_of(av_entry, struct util_ep, av_entry); - if (ep->cmap) + if (ep->cmap && ep->cmap->handles_av[index]) ofi_cmap_del_handle(ep->cmap->handles_av[index]); } @@ -473,7 +473,10 @@ if (util_attr->flags & FI_SOURCE) { av->hash.slots = av->count; - av->hash.total_count = av->count + util_attr->overhead; + if (util_attr->overhead) + av->hash.total_count = av->count + util_attr->overhead; + else + av->hash.total_count = av->count * 2; FI_INFO(av->prov, FI_LOG_AV, "FI_SOURCE requested, hash size %zu\n", av->hash.total_count); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/prov/verbs/src/ep_rdm/verbs_ep_rdm.c new/libfabric-1.5.2.0.480a6db3/prov/verbs/src/ep_rdm/verbs_ep_rdm.c --- old/libfabric-1.5.1.0.476d147d/prov/verbs/src/ep_rdm/verbs_ep_rdm.c 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/prov/verbs/src/ep_rdm/verbs_ep_rdm.c 2017-11-08 22:12:47.000000000 +0100 @@ -572,6 +572,11 @@ } else { _ep->use_odp = param; } + } else { + /* Disable by default. Because this feature may corrupt + * data due to IBV_EXP_ACCESS_RELAXED flag. But usage + * this feature w/o this flag leads to poor bandwidth */ + _ep->use_odp = 0; } _ep->rq_wr_depth = info->rx_attr->size; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libfabric-1.5.1.0.476d147d/prov/verbs/src/verbs_info.c new/libfabric-1.5.2.0.480a6db3/prov/verbs/src/verbs_info.c --- old/libfabric-1.5.1.0.476d147d/prov/verbs/src/verbs_info.c 2017-10-04 19:21:50.000000000 +0200 +++ new/libfabric-1.5.2.0.480a6db3/prov/verbs/src/verbs_info.c 2017-11-08 22:12:47.000000000 +0100 @@ -211,20 +211,32 @@ } if (attr->tx_ctx_cnt > info->domain_attr->max_ep_tx_ctx) { - VERBS_INFO(FI_LOG_CORE, - "tx_ctx_cnt exceeds supported size\n"); - VERBS_INFO(FI_LOG_CORE, "Supported: %zd\nRequested: %zd\n", - info->domain_attr->max_ep_tx_ctx, attr->tx_ctx_cnt); - return -FI_ENODATA; + if (attr->tx_ctx_cnt != FI_SHARED_CONTEXT) { + VERBS_INFO(FI_LOG_CORE, + "tx_ctx_cnt exceeds supported size\n"); + VERBS_INFO(FI_LOG_CORE, "Supported: %zd\nRequested: %zd\n", + info->domain_attr->max_ep_tx_ctx, attr->tx_ctx_cnt); + return -FI_ENODATA; + } else if (!info->domain_attr->max_ep_stx_ctx) { + VERBS_INFO(FI_LOG_CORE, + "Shared tx context not supported\n"); + return -FI_ENODATA; + } } - if ((attr->rx_ctx_cnt > info->domain_attr->max_ep_rx_ctx) && - (attr->rx_ctx_cnt != FI_SHARED_CONTEXT)) { - VERBS_INFO(FI_LOG_CORE, - "rx_ctx_cnt exceeds supported size\n"); - VERBS_INFO(FI_LOG_CORE, "Supported: %zd\nRequested: %zd\n", - info->domain_attr->max_ep_rx_ctx, attr->rx_ctx_cnt); - return -FI_ENODATA; + if ((attr->rx_ctx_cnt > info->domain_attr->max_ep_rx_ctx)) { + if (attr->rx_ctx_cnt != FI_SHARED_CONTEXT) { + VERBS_INFO(FI_LOG_CORE, + "rx_ctx_cnt exceeds supported size\n"); + VERBS_INFO(FI_LOG_CORE, "Supported: %zd\nRequested: %zd\n", + info->domain_attr->max_ep_rx_ctx, + attr->rx_ctx_cnt); + return -FI_ENODATA; + } else if (!info->domain_attr->max_ep_srx_ctx) { + VERBS_INFO(FI_LOG_CORE, + "Shared rx context not supported\n"); + return -FI_ENODATA; + } } if (attr->auth_key_size && @@ -252,8 +264,8 @@ compare_mode = attr->mode ? attr->mode : hints->mode; - check_mode = (hints->caps & FI_RMA) ? info->rx_attr->mode : - (info->rx_attr->mode & ~FI_RX_CQ_DATA); + check_mode = (hints->domain_attr && hints->domain_attr->cq_data_size) ? + info->rx_attr->mode : (info->rx_attr->mode & ~FI_RX_CQ_DATA); if ((compare_mode & check_mode) != check_mode) { VERBS_INFO(FI_LOG_CORE, @@ -571,7 +583,7 @@ info->domain_attr->rx_ctx_cnt = MIN(info->domain_attr->rx_ctx_cnt, device_attr.max_qp); info->domain_attr->max_ep_tx_ctx = MIN(info->domain_attr->tx_ctx_cnt, device_attr.max_qp); info->domain_attr->max_ep_rx_ctx = MIN(info->domain_attr->rx_ctx_cnt, device_attr.max_qp); - info->domain_attr->max_ep_srx_ctx = device_attr.max_qp; + info->domain_attr->max_ep_srx_ctx = device_attr.max_srq; info->domain_attr->mr_cnt = device_attr.max_mr; if (info->ep_attr->type == FI_EP_RDM) @@ -585,8 +597,10 @@ MIN(device_attr.max_qp_wr, device_attr.max_srq_wr) : device_attr.max_qp_wr; - info->rx_attr->iov_limit = MIN(device_attr.max_sge, - device_attr.max_srq_sge); + info->rx_attr->iov_limit = device_attr.max_srq_sge ? + MIN(device_attr.max_sge, + device_attr.max_srq_sge) : + device_attr.max_sge; ret = fi_ibv_get_qp_cap(ctx, info); if (ret) @@ -726,6 +740,11 @@ fi->ep_attr->protocol = FI_PROTO_IWARP_RDM; fi->tx_attr->op_flags = VERBS_TX_OP_FLAGS_IWARP_RDM; } + + /* TODO Some iWarp HW may support immediate data as per RFC 7306 + * (RDMA Protocol Extensions). Update this to figure out if the + * hw supports immediate data dynamically */ + fi->domain_attr->cq_data_size = 0; break; default: VERBS_INFO(FI_LOG_CORE, "Unknown transport type\n"); @@ -1002,8 +1021,12 @@ struct sockaddr *local_addr; int ret; - if (rai->ai_src_addr && (((*info)->ep_attr->type == FI_EP_MSG) || - !ofi_is_loopback_addr(rai->ai_src_addr))) + /* + * TODO MPICH CH3 doesn't work with verbs provider without skipping the + * loopback address. An alternative approach if there is one is needed + * to allow both. + */ + if (rai->ai_src_addr && !ofi_is_loopback_addr(rai->ai_src_addr)) goto rai_to_fi; if (!id->verbs) @@ -1121,12 +1144,12 @@ size_t default_attr, char *attr_str) { if (default_attr > *attr) { - VERBS_WARN(FI_LOG_FABRIC, "%s supported by domain: %s is less " - "than provider's default\n", attr_str, - info->domain_attr->name); - return -FI_EINVAL; + VERBS_WARN(FI_LOG_FABRIC, "Ignoring provider default value " + "for %s as it is greater than the value supported " + "by domain: %s\n", attr_str, info->domain_attr->name); + } else { + *attr = default_attr; } - *attr = default_attr; return 0; } @@ -1224,6 +1247,7 @@ struct rdma_cm_id *id = NULL; struct rdma_addrinfo *rai; const char *dev_name = NULL; + struct fi_info *cur; int ret; ret = fi_ibv_init_info(); @@ -1248,6 +1272,11 @@ } ofi_alter_info(*info, hints, version); + + if (!hints || !(hints->mode & FI_RX_CQ_DATA)) { + for (cur = *info; cur; cur = cur->next) + cur->domain_attr->cq_data_size = 0; + } err: fi_ibv_destroy_ep(rai, &id); out: