Previously, for the fsdax mem-backend-file, it will register failed with
Operation not supported. In this case, we can try to register it with
On-Demand Paging[1] like what rpma_mr_reg() does on rpma[2].
[1]:
https://community.mellanox.com/s/article/understanding-on-demand-paging--odp-x
[2]: http://pmem.io/rpma/manpages/v0.9.0/rpma_mr_reg.3
CC: Marcel Apfelbaum
Signed-off-by: Li Zhijian
Reviewed-by: Marcel Apfelbaum
---
V2: add ODP sanity check and remove goto
---
migration/rdma.c | 73 ++
migration/trace-events | 1 +
2 files changed, 54 insertions(+), 20 deletions(-)
diff --git a/migration/rdma.c b/migration/rdma.c
index 5c2d113aa94..eb80431aae2 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -1117,19 +1117,47 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
return 0;
}
+/* Check whether On-Demand Paging is supported by RDAM device */
+static bool rdma_support_odp(struct ibv_context *dev)
+{
+struct ibv_device_attr_ex attr = {0};
+int ret = ibv_query_device_ex(dev, NULL, );
+if (ret) {
+return false;
+}
+
+if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) {
+return true;
+}
+
+return false;
+}
+
static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
{
int i;
RDMALocalBlocks *local = >local_ram_blocks;
for (i = 0; i < local->nb_blocks; i++) {
+int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
+
local->block[i].mr =
ibv_reg_mr(rdma->pd,
local->block[i].local_host_addr,
-local->block[i].length,
-IBV_ACCESS_LOCAL_WRITE |
-IBV_ACCESS_REMOTE_WRITE
+local->block[i].length, access
);
+
+if (!local->block[i].mr &&
+errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
+access |= IBV_ACCESS_ON_DEMAND;
+/* register ODP mr */
+local->block[i].mr =
+ibv_reg_mr(rdma->pd,
+ local->block[i].local_host_addr,
+ local->block[i].length, access);
+trace_qemu_rdma_register_odp_mr(local->block[i].block_name);
+}
+
if (!local->block[i].mr) {
perror("Failed to register local dest ram block!");
break;
@@ -1215,28 +1243,33 @@ static int qemu_rdma_register_and_get_keys(RDMAContext
*rdma,
*/
if (!block->pmr[chunk]) {
uint64_t len = chunk_end - chunk_start;
+int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE :
+ 0;
trace_qemu_rdma_register_and_get_keys(len, chunk_start);
-block->pmr[chunk] = ibv_reg_mr(rdma->pd,
-chunk_start, len,
-(rkey ? (IBV_ACCESS_LOCAL_WRITE |
-IBV_ACCESS_REMOTE_WRITE) : 0));
-
-if (!block->pmr[chunk]) {
-perror("Failed to register chunk!");
-fprintf(stderr, "Chunk details: block: %d chunk index %d"
-" start %" PRIuPTR " end %" PRIuPTR
-" host %" PRIuPTR
-" local %" PRIuPTR " registrations: %d\n",
-block->index, chunk, (uintptr_t)chunk_start,
-(uintptr_t)chunk_end, host_addr,
-(uintptr_t)block->local_host_addr,
-rdma->total_registrations);
-return -1;
+block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
+if (!block->pmr[chunk] &&
+errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
+access |= IBV_ACCESS_ON_DEMAND;
+/* register ODP mr */
+block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
+trace_qemu_rdma_register_odp_mr(block->block_name);
}
-rdma->total_registrations++;
}
+if (!block->pmr[chunk]) {
+perror("Failed to register chunk!");
+fprintf(stderr, "Chunk details: block: %d chunk index %d"
+" start %" PRIuPTR " end %" PRIuPTR
+" host %" PRIuPTR
+" local %" PRIuPTR " registrations: %d\n",
+block->index, chunk, (uintptr_t)chunk_start,
+(uintptr_t)chunk_end, host_addr,
+(uintptr_t)block->local_host_addr,
+rdma->total_registrations);
+return -1;
+}
+rdma->total_registrations++;
if (lkey) {
*lkey = block->pmr[chunk]->lkey;
diff --git a/migration/trace-events b/migration/trace-events
index a1c0f034ab8..5f6aa580def 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -212,6 +212,7 @@ qemu_rdma_poll_write(const char *compstr, int64_t comp, int
left, uint64_t block