Hi Zhijian, On Mon, Aug 23, 2021 at 4:41 AM lizhij...@fujitsu.com <lizhij...@fujitsu.com> wrote: > > > > On 22/08/2021 16:53, Marcel Apfelbaum wrote: > > Hi > > > > On Sat, Jul 31, 2021 at 5:00 PM Li Zhijian <lizhij...@cn.fujitsu.com> wrote: > >> Previously, for the fsdax mem-backend-file, it will register failed with > >> Operation not supported. In this case, we can try to register it with > >> On-Demand Paging[1] like what rpma_mr_reg() does on rpma[2]. > >> > >> [1]: > >> https://community.mellanox.com/s/article/understanding-on-demand-paging--odp-x > >> [2]: http://pmem.io/rpma/manpages/v0.9.0/rpma_mr_reg.3 > >> Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com> > >> --- > >> migration/rdma.c | 27 ++++++++++++++++++--------- > >> migration/trace-events | 1 + > >> 2 files changed, 19 insertions(+), 9 deletions(-) > >> > >> diff --git a/migration/rdma.c b/migration/rdma.c > >> index 5c2d113aa94..8784b5f22a6 100644 > >> --- a/migration/rdma.c > >> +++ b/migration/rdma.c > >> @@ -1123,15 +1123,21 @@ static int > >> qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) > >> RDMALocalBlocks *local = &rdma->local_ram_blocks; > >> > >> for (i = 0; i < local->nb_blocks; i++) { > >> + int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; > >> + > >> +on_demand: > >> local->block[i].mr = > >> ibv_reg_mr(rdma->pd, > >> local->block[i].local_host_addr, > >> - local->block[i].length, > >> - IBV_ACCESS_LOCAL_WRITE | > >> - IBV_ACCESS_REMOTE_WRITE > >> + local->block[i].length, access > >> ); > >> if (!local->block[i].mr) { > >> - perror("Failed to register local dest ram block!"); > >> + if (!(access & IBV_ACCESS_ON_DEMAND) && errno == ENOTSUP) { > >> + access |= IBV_ACCESS_ON_DEMAND; > >> + > >> trace_qemu_rdma_register_odp_mr(local->block[i].block_name); > >> + goto on_demand; > > Wouldn't it be better to check first if the device supports ODP ? > > Something like: > > ret = ibv_exp_query_device(context, &dattr); > > if (dattr.exp_device_cap_flags & IBV_EXP_DEVICE_ODP)... > > Good idea ! > > > > > > > Also, I am not (personally) too fond of the "on_demand" label usage here, > > however I will let the maintainer/others decide. > Indeed, how just repeating the ibv_reg_mr() instead of a 'go to' >
Any "boring"/standard approach would do. Thanks, Marcel > Thanks > Zhijian > > > > > > > Thanks, > > Marcel > > > >> + } > >> + perror("Failed to register local dest ram block!\n"); > >> break; > >> } > >> rdma->total_registrations++; > >> @@ -1215,15 +1221,18 @@ static int > >> qemu_rdma_register_and_get_keys(RDMAContext *rdma, > >> */ > >> if (!block->pmr[chunk]) { > >> uint64_t len = chunk_end - chunk_start; > >> + int access = rkey ? IBV_ACCESS_LOCAL_WRITE | > >> IBV_ACCESS_REMOTE_WRITE : 0; > >> > >> trace_qemu_rdma_register_and_get_keys(len, chunk_start); > >> > >> - block->pmr[chunk] = ibv_reg_mr(rdma->pd, > >> - chunk_start, len, > >> - (rkey ? (IBV_ACCESS_LOCAL_WRITE | > >> - IBV_ACCESS_REMOTE_WRITE) : 0)); > >> - > >> +on_demand: > >> + block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, > >> access); > >> if (!block->pmr[chunk]) { > >> + if (!(access & IBV_ACCESS_ON_DEMAND) && errno == ENOTSUP) { > >> + access |= IBV_ACCESS_ON_DEMAND; > >> + trace_qemu_rdma_register_odp_mr(block->block_name); > >> + goto on_demand; > >> + } > >> perror("Failed to register chunk!"); > >> fprintf(stderr, "Chunk details: block: %d chunk index %d" > >> " start %" PRIuPTR " end %" PRIuPTR > >> diff --git a/migration/trace-events b/migration/trace-events > >> index a1c0f034ab8..5f6aa580def 100644 > >> --- a/migration/trace-events > >> +++ b/migration/trace-events > >> @@ -212,6 +212,7 @@ qemu_rdma_poll_write(const char *compstr, int64_t > >> comp, int left, uint64_t block > >> qemu_rdma_poll_other(const char *compstr, int64_t comp, int left) "other > >> completion %s (%" PRId64 ") received left %d" > >> qemu_rdma_post_send_control(const char *desc) "CONTROL: sending %s.." > >> qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering > >> %" PRIu64 " bytes @ %p" > >> +qemu_rdma_register_odp_mr(const char *name) "Try to register On-Demand > >> Paging memory region: %s" > >> qemu_rdma_registration_handle_compress(int64_t length, int index, > >> int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset > >> %" PRId64 > >> qemu_rdma_registration_handle_finished(void) "" > >> qemu_rdma_registration_handle_ram_blocks(void) "" > >> -- > >> 2.31.1 > >> > >> > >> > >> > >