Hi Zhijian, On Mon, Aug 23, 2021 at 11:42 AM lizhij...@fujitsu.com <lizhij...@fujitsu.com> wrote: > > CCing Marcel > > > On 23/08/2021 11:33, Li Zhijian wrote: > > Previously, for the fsdax mem-backend-file, it will register failed with > > Operation not supported. In this case, we can try to register it with > > On-Demand Paging[1] like what rpma_mr_reg() does on rpma[2]. > > > > [1]: > > https://community.mellanox.com/s/article/understanding-on-demand-paging--odp-x > > [2]: http://pmem.io/rpma/manpages/v0.9.0/rpma_mr_reg.3 > > > > CC: Marcel Apfelbaum <marcel.apfelb...@gmail.com> > > Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com> > > > > --- > > V2: add ODP sanity check and remove goto > > --- > > migration/rdma.c | 73 ++++++++++++++++++++++++++++++------------ > > migration/trace-events | 1 + > > 2 files changed, 54 insertions(+), 20 deletions(-) > > > > diff --git a/migration/rdma.c b/migration/rdma.c > > index 5c2d113aa94..eb80431aae2 100644 > > --- a/migration/rdma.c > > +++ b/migration/rdma.c > > @@ -1117,19 +1117,47 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma) > > return 0; > > } > > > > +/* Check whether On-Demand Paging is supported by RDAM device */ > > +static bool rdma_support_odp(struct ibv_context *dev) > > +{ > > + struct ibv_device_attr_ex attr = {0}; > > + int ret = ibv_query_device_ex(dev, NULL, &attr); > > + if (ret) { > > + return false; > > + } > > + > > + if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) { > > + return true; > > + } > > + > > + return false; > > +} > > + > > static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) > > { > > int i; > > RDMALocalBlocks *local = &rdma->local_ram_blocks; > > > > for (i = 0; i < local->nb_blocks; i++) { > > + int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; > > + > > local->block[i].mr = > > ibv_reg_mr(rdma->pd, > > local->block[i].local_host_addr, > > - local->block[i].length, > > - IBV_ACCESS_LOCAL_WRITE | > > - IBV_ACCESS_REMOTE_WRITE > > + local->block[i].length, access > > ); > > + > > + if (!local->block[i].mr && > > + errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { > > + access |= IBV_ACCESS_ON_DEMAND; > > + /* register ODP mr */ > > + local->block[i].mr = > > + ibv_reg_mr(rdma->pd, > > + local->block[i].local_host_addr, > > + local->block[i].length, access); > > + > > trace_qemu_rdma_register_odp_mr(local->block[i].block_name); > > + } > > + > > if (!local->block[i].mr) { > > perror("Failed to register local dest ram block!"); > > break; > > @@ -1215,28 +1243,33 @@ static int > > qemu_rdma_register_and_get_keys(RDMAContext *rdma, > > */ > > if (!block->pmr[chunk]) { > > uint64_t len = chunk_end - chunk_start; > > + int access = rkey ? IBV_ACCESS_LOCAL_WRITE | > > IBV_ACCESS_REMOTE_WRITE : > > + 0; > > > > trace_qemu_rdma_register_and_get_keys(len, chunk_start); > > > > - block->pmr[chunk] = ibv_reg_mr(rdma->pd, > > - chunk_start, len, > > - (rkey ? (IBV_ACCESS_LOCAL_WRITE | > > - IBV_ACCESS_REMOTE_WRITE) : 0)); > > - > > - if (!block->pmr[chunk]) { > > - perror("Failed to register chunk!"); > > - fprintf(stderr, "Chunk details: block: %d chunk index %d" > > - " start %" PRIuPTR " end %" PRIuPTR > > - " host %" PRIuPTR > > - " local %" PRIuPTR " registrations: %d\n", > > - block->index, chunk, (uintptr_t)chunk_start, > > - (uintptr_t)chunk_end, host_addr, > > - (uintptr_t)block->local_host_addr, > > - rdma->total_registrations); > > - return -1; > > + block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access); > > + if (!block->pmr[chunk] && > > + errno == ENOTSUP && rdma_support_odp(rdma->verbs)) { > > + access |= IBV_ACCESS_ON_DEMAND; > > + /* register ODP mr */ > > + block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, > > access); > > + trace_qemu_rdma_register_odp_mr(block->block_name); > > } > > - rdma->total_registrations++; > > } > > + if (!block->pmr[chunk]) { > > + perror("Failed to register chunk!"); > > + fprintf(stderr, "Chunk details: block: %d chunk index %d" > > + " start %" PRIuPTR " end %" PRIuPTR > > + " host %" PRIuPTR > > + " local %" PRIuPTR " registrations: %d\n", > > + block->index, chunk, (uintptr_t)chunk_start, > > + (uintptr_t)chunk_end, host_addr, > > + (uintptr_t)block->local_host_addr, > > + rdma->total_registrations); > > + return -1; > > + } > > + rdma->total_registrations++; > > > > if (lkey) { > > *lkey = block->pmr[chunk]->lkey; > > diff --git a/migration/trace-events b/migration/trace-events > > index a1c0f034ab8..5f6aa580def 100644 > > --- a/migration/trace-events > > +++ b/migration/trace-events > > @@ -212,6 +212,7 @@ qemu_rdma_poll_write(const char *compstr, int64_t comp, > > int left, uint64_t block > > qemu_rdma_poll_other(const char *compstr, int64_t comp, int left) "other > > completion %s (%" PRId64 ") received left %d" > > qemu_rdma_post_send_control(const char *desc) "CONTROL: sending %s.." > > qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering > > %" PRIu64 " bytes @ %p" > > +qemu_rdma_register_odp_mr(const char *name) "Try to register On-Demand > > Paging memory region: %s" > > qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t > > offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64 > > qemu_rdma_registration_handle_finished(void) "" > > qemu_rdma_registration_handle_ram_blocks(void) ""
Reviewed-by: Marcel Apfelbaum <marcel.apfelb...@gmail.com> Thanks, Marcel