On Fri, Mar 06, 2026 at 12:11:51PM +0000, Alireza Sanaee wrote: > The CXL address to device decoding logic is complex because of the need to > correctly decode fine grained interleave. The current implementation > prevents use with KVM where executed instructions may reside in that memory > and gives very slow performance even in TCG. > > In many real cases non interleaved memory configurations are useful and for > those we can use a more conventional memory region alias allowing similar > performance to other memory in the system. > > Whether this fast path is applicable can be established once the full set > of HDM decoders has been committed (in whatever order the guest decides to > commit them). As such a check is performed on each commit/uncommit of HDM > decoder to establish if the alias should be added or removed. > > Reviewed-by: Li Zhijian <[email protected]> > Co-developed-by: Jonathan Cameron <[email protected]> > Signed-off-by: Jonathan Cameron <[email protected]> > Signed-off-by: Alireza Sanaee <[email protected]>
Reviewed-by: Gregory Price <[email protected]> Tested-by: Gregory Price <[email protected]> > --- > Thanks to Gregory, and Li Zhijian for their feedback. > v6->v7: > - Fixed a div by zero situation in the code for > interleaved_ways_dec func. > - Changed the signature of cfmws_update_non_interleaved function to > void. > > hw/cxl/cxl-component-utils.c | 6 ++ > hw/cxl/cxl-host.c | 197 +++++++++++++++++++++++++++++++++++ > hw/mem/cxl_type3.c | 4 + > include/hw/cxl/cxl.h | 1 + > include/hw/cxl/cxl_device.h | 4 + > 5 files changed, 212 insertions(+) > > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c > index 07aabe331c..a624357978 100644 > --- a/hw/cxl/cxl-component-utils.c > +++ b/hw/cxl/cxl-component-utils.c > @@ -143,6 +143,12 @@ static void dumb_hdm_handler(CXLComponentState > *cxl_cstate, hwaddr offset, > value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, COMMITTED, 0); > } > stl_le_p((uint8_t *)cache_mem + offset, value); > + > + if (should_commit) { > + cfmws_update_non_interleaved(true); > + } else if (should_uncommit) { > + cfmws_update_non_interleaved(false); > + } > } > > static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset, > diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c > index 2dc9f77007..079b27133b 100644 > --- a/hw/cxl/cxl-host.c > +++ b/hw/cxl/cxl-host.c > @@ -264,6 +264,203 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow > *fw, hwaddr addr, > return d; > } > > +typedef struct CXLDirectPTState { > + CXLType3Dev *ct3d; > + hwaddr decoder_base; > + hwaddr decoder_size; > + hwaddr dpa_base; > + unsigned int hdm_decoder_idx; > +} CXLDirectPTState; > + > +static void cxl_fmws_direct_passthrough_setup(CXLDirectPTState *state, > + CXLFixedWindow *fw) > +{ > + CXLType3Dev *ct3d = state->ct3d; > + MemoryRegion *mr = NULL; > + uint64_t vmr_size = 0, pmr_size = 0, offset = 0; > + MemoryRegion *direct_mr; > + g_autofree char *direct_mr_name; > + unsigned int idx = state->hdm_decoder_idx; > + > + if (ct3d->hostvmem) { > + MemoryRegion *vmr = host_memory_backend_get_memory(ct3d->hostvmem); > + > + vmr_size = memory_region_size(vmr); > + if (state->dpa_base < vmr_size) { > + mr = vmr; > + offset = state->dpa_base; > + } > + } > + if (!mr && ct3d->hostpmem) { > + MemoryRegion *pmr = host_memory_backend_get_memory(ct3d->hostpmem); > + > + pmr_size = memory_region_size(pmr); > + if (state->dpa_base - vmr_size < pmr_size) { > + mr = pmr; > + offset = state->dpa_base - vmr_size; > + } > + } > + if (!mr) { > + return; > + } > + > + if (ct3d->direct_mr_fw[idx]) { > + return; > + } > + > + direct_mr = &ct3d->direct_mr[idx]; > + direct_mr_name = g_strdup_printf("cxl-direct-mapping-alias-%u", idx); > + if (!direct_mr_name) { > + return; > + } > + > + memory_region_init_alias(direct_mr, OBJECT(ct3d), direct_mr_name, mr, > + offset, state->decoder_size); > + memory_region_add_subregion(&fw->mr, > + state->decoder_base - fw->base, direct_mr); > + ct3d->direct_mr_fw[idx] = fw; > +} > + > +static void cxl_fmws_direct_passthrough_remove(CXLType3Dev *ct3d, > + uint64_t decoder_base, > + unsigned int idx) > +{ > + CXLFixedWindow *owner_fw = ct3d->direct_mr_fw[idx]; > + MemoryRegion *direct_mr = &ct3d->direct_mr[idx]; > + > + if (!owner_fw) { > + return; > + } > + > + if (!memory_region_is_mapped(direct_mr)) { > + return; > + } > + > + if (cxl_cfmws_find_device(owner_fw, decoder_base, false)) { > + return; > + } > + > + memory_region_del_subregion(&owner_fw->mr, direct_mr); > + object_unparent(OBJECT(direct_mr)); > + ct3d->direct_mr_fw[idx] = NULL; > +} > + > +static int cxl_fmws_direct_passthrough(Object *obj, void *opaque) > +{ > + CXLDirectPTState *state = opaque; > + CXLFixedWindow *fw; > + > + if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) { > + return 0; > + } > + > + fw = CXL_FMW(obj); > + > + /* Verify not interleaved */ > + if (!cxl_cfmws_find_device(fw, state->decoder_base, false)) { > + return 0; > + } > + > + cxl_fmws_direct_passthrough_setup(state, fw); > + > + return 0; > +} > + > +static int update_non_interleaved(Object *obj, void *opaque) > +{ > + const int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - > R_CXL_HDM_DECODER0_BASE_LO; > + bool commit = *(bool *)opaque; > + CXLType3Dev *ct3d; > + uint32_t *cache_mem; > + unsigned int hdm_count, i; > + int interleave_ways_dec; > + uint32_t cap; > + uint64_t dpa_base = 0; > + > + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { > + return 0; > + } > + > + ct3d = CXL_TYPE3(obj); > + cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers; > + cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY); > + hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap, > + CXL_HDM_DECODER_CAPABILITY, > + DECODER_COUNT)); > + for (i = 0; i < hdm_count; i++) { > + uint64_t decoder_base, decoder_size, skip; > + uint32_t hdm_ctrl, low, high; > + int iw, committed; > + > + hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * > hdm_inc); > + committed = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED); > + > + /* > + * Optimization: Looking for a fully committed path; if the type 3 > HDM > + * decoder is not commmitted, it cannot lie on such a path. > + */ > + if (commit && !committed) { > + return 0; > + } > + > + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_LO + > + i * hdm_inc); > + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_HI + > + i * hdm_inc); > + skip = ((uint64_t)high << 32) | (low & 0xf0000000); > + dpa_base += skip; > + > + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc); > + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * > hdm_inc); > + decoder_size = ((uint64_t)high << 32) | (low & 0xf0000000); > + > + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc); > + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * > hdm_inc); > + decoder_base = ((uint64_t)high << 32) | (low & 0xf0000000); > + > + iw = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IW); > + > + if (iw == 0) { > + if (!commit) { > + cxl_fmws_direct_passthrough_remove(ct3d, decoder_base, i); > + } else { > + CXLDirectPTState state = { > + .ct3d = ct3d, > + .decoder_base = decoder_base, > + .decoder_size = decoder_size, > + .dpa_base = dpa_base, > + .hdm_decoder_idx = i, > + }; > + > + object_child_foreach_recursive(object_get_root(), > + cxl_fmws_direct_passthrough, > + &state); > + } > + } > + > + interleave_ways_dec = cxl_interleave_ways_dec(iw, &error_fatal); > + if (interleave_ways_dec == 0) { > + return 0; > + } > + > + dpa_base += decoder_size / interleave_ways_dec; > + } > + > + return 0; > +} > + > +void cfmws_update_non_interleaved(bool commit) > +{ > + /* > + * Walk endpoints to find both committed and uncommitted decoders, > + * then check if they are not interleaved (but the path is fully set up). > + */ > + object_child_foreach_recursive(object_get_root(), > + update_non_interleaved, &commit); > + > + return; > +} > + > static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, uint64_t *data, > unsigned size, MemTxAttrs attrs) > { > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c > index 4739239da3..d9fc0bec8f 100644 > --- a/hw/mem/cxl_type3.c > +++ b/hw/mem/cxl_type3.c > @@ -427,6 +427,8 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int > which) > ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); > > stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl); > + > + cfmws_update_non_interleaved(true); > } > > static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which) > @@ -442,6 +444,8 @@ static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int > which) > ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 0); > > stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl); > + > + cfmws_update_non_interleaved(false); > } > > static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err) > diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h > index 998f495a98..d8cd8359d2 100644 > --- a/include/hw/cxl/cxl.h > +++ b/include/hw/cxl/cxl.h > @@ -71,4 +71,5 @@ CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp); > typedef struct CXLDownstreamPort CXLDownstreamPort; > DECLARE_INSTANCE_CHECKER(CXLDownstreamPort, CXL_DSP, TYPE_CXL_DSP) > > +void cfmws_update_non_interleaved(bool commit); > #endif > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h > index 393f312217..ba551fa5f9 100644 > --- a/include/hw/cxl/cxl_device.h > +++ b/include/hw/cxl/cxl_device.h > @@ -685,6 +685,8 @@ typedef struct CXLSetFeatureInfo { > size_t data_size; > } CXLSetFeatureInfo; > > +typedef struct CXLFixedWindow CXLFixedWindow; > + > struct CXLSanitizeInfo; > > typedef struct CXLAlertConfig { > @@ -712,6 +714,8 @@ struct CXLType3Dev { > uint64_t sn; > > /* State */ > + MemoryRegion direct_mr[CXL_HDM_DECODER_COUNT]; > + CXLFixedWindow *direct_mr_fw[CXL_HDM_DECODER_COUNT]; > AddressSpace hostvmem_as; > AddressSpace hostpmem_as; > CXLComponentState cxl_cstate; > -- > 2.43.0 >
