On Mon, 13 Oct 2025 15:31:06 +0100 Alireza Sanaee via <[email protected]> wrote:
> The CXL address to device decoding logic is complex because of the > need to correctly decode fine grained interleave. The current > implementation prevents use with KVM where executed instructions may > reside in that memory and gives very slow performance even in TCG. > > In many real cases non interleaved memory configurations are useful > and for those we can use a more conventional memory region alias > allowing similar performance to other memory in the system. > > Whether this fast path is applicable can be established once the full > set of HDM decoders has been committed (in whatever order the guest > decides to commit them). As such a check is performed on each commit / > uncommit of HDM decoder to establish if the alias should be added or > removed. > > Co-developed-by: Jonathan Cameron <[email protected]> > Signed-off-by: Jonathan Cameron <[email protected]> > Signed-off-by: Alireza Sanaee <[email protected]> > --- > Thanks to Jonathan Cameron for feedback and help with this patch. > hw/cxl/cxl-component-utils.c | 9 ++ > hw/cxl/cxl-host.c | 268 > ++++++++++++++++++++++++++++++++++- hw/mem/cxl_type3.c | > 4 + include/hw/cxl/cxl.h | 1 + > include/hw/cxl/cxl_device.h | 10 ++ > 5 files changed, 289 insertions(+), 3 deletions(-) > > diff --git a/hw/cxl/cxl-component-utils.c > b/hw/cxl/cxl-component-utils.c index 473895948b..f1ecd6ed22 100644 > --- a/hw/cxl/cxl-component-utils.c > +++ b/hw/cxl/cxl-component-utils.c > @@ -116,6 +116,15 @@ static void dumb_hdm_handler(CXLComponentState > *cxl_cstate, hwaddr offset, value = FIELD_DP32(value, > CXL_HDM_DECODER0_CTRL, COMMITTED, 0); } > stl_le_p((uint8_t *)cache_mem + offset, value); > + > + if (should_commit) { > + cfmws_update_non_interleaved(true); > + } > + > + if (should_uncommit) { > + cfmws_update_non_interleaved(false); > + } > + > } > > static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, > uint64_t value, diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c > index 0d891c651d..3a563af3bc 100644 > --- a/hw/cxl/cxl-host.c > +++ b/hw/cxl/cxl-host.c > @@ -12,6 +12,7 @@ > #include "qapi/error.h" > #include "system/qtest.h" > #include "hw/boards.h" > +#include "qemu/log.h" > > #include "qapi/qapi-visit-machine.h" > #include "hw/cxl/cxl.h" > @@ -104,7 +105,7 @@ void cxl_fmws_link_targets(Error **errp) > } > > static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr, > - uint8_t *target) > + uint8_t *target, bool *interleaved) > { > int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - > R_CXL_HDM_DECODER0_BASE_LO; unsigned int hdm_count; > @@ -138,6 +139,11 @@ static bool cxl_hdm_find_target(uint32_t > *cache_mem, hwaddr addr, found = true; > ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG); > iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW); > + > + if (interleaved) { > + *interleaved = iw_enc != 0; > + } > + > target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc); > > if (target_idx < 4) { > @@ -190,7 +196,7 @@ static PCIDevice > *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) > cache_mem = hb_cstate->crb.cache_mem_registers; > > - target_found = cxl_hdm_find_target(cache_mem, addr, &target); > + target_found = cxl_hdm_find_target(cache_mem, addr, &target, > NULL); if (!target_found) { > return NULL; > } > @@ -226,7 +232,7 @@ static PCIDevice > *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) > cache_mem = usp_cstate->crb.cache_mem_registers; > > - target_found = cxl_hdm_find_target(cache_mem, addr, &target); > + target_found = cxl_hdm_find_target(cache_mem, addr, &target, > NULL); if (!target_found) { > return NULL; > } > @@ -248,6 +254,262 @@ static PCIDevice > *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) return d; > } > > +static bool cfmws_is_not_interleaved(CXLFixedWindow *fw, hwaddr addr) > +{ > + PCIDevice *rp, *d; > + PCIHostState *hb; > + CXLComponentState *hb_cstate, *usp_cstate; > + CXLUpstreamPort *usp; > + uint32_t *cache_mem; > + bool target_found, interleaved; > + uint8_t target; > + > + addr = fw->base; > + if (fw->num_targets > 1) { > + return false; > + } > + > + hb = PCI_HOST_BRIDGE(fw->target_hbs[0]->cxl_host_bridge); > + if (!hb || !hb->bus || !pci_bus_is_cxl(hb->bus)) { > + return false; > + } > + if (cxl_get_hb_passthrough(hb)) { > + rp = pcie_find_port_first(hb->bus); > + if (!rp) { > + return false; > + } > + } else { > + hb_cstate = cxl_get_hb_cstate(hb); > + if (!hb_cstate) { > + return false; > + } > + cache_mem = hb_cstate->crb.cache_mem_registers; > + > + target_found = cxl_hdm_find_target(cache_mem, addr, &target, > + &interleaved); > + if (!target_found) { > + return false; > + } > + if (interleaved) { > + return false; > + } > + > + rp = pcie_find_port_by_pn(hb->bus, target); > + if (!rp) { > + return false; > + } > + } > + d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0]; > + if (!d) { > + return false; > + } > + > + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { > + return true; > + } > + /* > + * Could also be a switch. Note only one level of switching > currently > + * supported. > + */ > + if (!object_dynamic_cast(OBJECT(d), TYPE_CXL_USP)) { > + return false; > + } > + usp = CXL_USP(d); > + > + usp_cstate = cxl_usp_to_cstate(usp); > + if (!usp_cstate) { > + return false; > + } > + > + cache_mem = usp_cstate->crb.cache_mem_registers; > + > + target_found = cxl_hdm_find_target(cache_mem, addr, &target, > + &interleaved); > + if (!target_found) { > + return false; > + } > + if (interleaved) { > + return false; > + } > + > + d = pcie_find_port_by_pn(&PCI_BRIDGE(d)->sec_bus, target); > + if (!d) { > + return false; > + } > + > + d = pci_bridge_get_sec_bus(PCI_BRIDGE(d))->devices[0]; > + if (!d) { > + return false; > + } > + > + if (!object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { > + return false; > + } > + > + return true; > +} > + > +static int cxl_fmws_direct_passthrough(Object *obj, void *opaque) > +{ > + struct cxl_direct_pt_state *state = opaque; > + struct CXLFixedWindow *fw; > + CXLType3Dev *ct3d = state->ct3d; > + > + if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) { > + return 0; > + } > + > + fw = CXL_FMW(obj); > + > + if (!cfmws_is_not_interleaved(fw, state->decoder_base)) { > + return 0; > + } > + > + if (state->commit) { > + MemoryRegion *mr = NULL; > + uint64_t vmr_size = 0, pmr_size = 0; > + uint64_t offset = 0; > + > + if (ct3d->hostvmem) { > + MemoryRegion *vmr = > host_memory_backend_get_memory(ct3d->hostvmem); > + vmr_size = memory_region_size(vmr); > + if (state->dpa_base < vmr_size) { > + mr = vmr; > + offset = state->dpa_base; > + } > + } > + if (!mr && ct3d->hostpmem) { > + MemoryRegion *pmr = > host_memory_backend_get_memory(ct3d->hostpmem); > + pmr_size = memory_region_size(pmr); > + if (state->dpa_base - vmr_size < pmr_size) { > + mr = pmr; > + offset = state->dpa_base - vmr_size; > + } > + } > + > + if (!mr) { > + return 0; > + } > + > + if > (memory_region_is_mapped(&ct3d->direct_mr[state->hdm_decoder_idx])) { > + return 0; > + } > + > + > memory_region_init_alias(&ct3d->direct_mr[state->hdm_decoder_idx], > + OBJECT(ct3d), "direct-mapping", mr, > offset, > + state->decoder_size); > + memory_region_add_subregion(&fw->mr, > + state->decoder_base - fw->base, > + > &ct3d->direct_mr[state->hdm_decoder_idx]); > + } else { > + if > (memory_region_is_mapped(&ct3d->direct_mr[state->hdm_decoder_idx])) { > + memory_region_del_subregion(&fw->mr, > + &ct3d->direct_mr[state->hdm_decoder_idx]); > + } > + } > + > + return 0; > +} > + > +static int update_non_interleaved(Object *obj, void *opaque) > +{ > + CXLType3Dev *ct3d; > + uint32_t *cache_mem; > + unsigned int hdm_count, i; > + uint32_t cap; > + int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - > R_CXL_HDM_DECODER0_BASE_LO; > + uint64_t dpa_base = 0; > + bool commit = *(bool *) opaque; > + > + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { > + return 0; > + } > + > + ct3d = CXL_TYPE3(obj); > + cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers; > + /* > + * Walk the decoders and find any committed with iw set to 0 > + * (non interleaved). > + */ > + cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY); > + hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap, > + > CXL_HDM_DECODER_CAPABILITY, > + DECODER_COUNT)); > + > + /* Now for each committed HDM decoder */ > + for (i = 0; i < hdm_count; i++) { > + uint64_t decoder_base, decoder_size, skip; > + uint32_t hdm_ctrl, low, high; > + int iw, committed; > + > + hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i > * hdm_inc); > + committed = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, > COMMITTED); > + if (commit) { > + if (!committed) { > + return 0; > + } > + } else { > + if (committed) { > + return 0; > + } > + } > + > + /* > + * Even if this decoder is interleaved need to keep track of > DPA as the > + * next HDM decoder may not be interleaved. > + */ > + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_LO + > + i * hdm_inc); > + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_HI + > + i * hdm_inc); > + skip = ((uint64_t)high << 32) | (low & 0xf0000000); > + dpa_base += skip; > + > + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * > hdm_inc); > + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * > hdm_inc); > + decoder_size = ((uint64_t)high << 32) | (low & 0xf0000000); > + iw = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IW); > + /* Get the HPA of the base */ > + low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * > hdm_inc); > + high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * > hdm_inc); > + decoder_base = ((uint64_t)high << 32) | (low & 0xf0000000); > + > +#ifdef DEBUG > + qemu_log("non interleaved decoder %lx %lx %lx %d\n", > decoder_base, > + decoder_size, dpa_base, commit); > +#endif > + > + /* Is it non interleaved? - need to check full path later */ > + if (iw == 0) { > + struct cxl_direct_pt_state state = { > + .ct3d = ct3d, > + .decoder_base = decoder_base, > + .decoder_size = decoder_size, > + .dpa_base = dpa_base, > + .hdm_decoder_idx = i, > + .commit = commit, > + }; > + object_child_foreach_recursive(object_get_root(), > + > cxl_fmws_direct_passthrough, &state); > + } > + dpa_base += decoder_size / cxl_interleave_ways_dec(iw, > &error_fatal); + > + } > + return 0; > +} > + > +bool cfmws_update_non_interleaved(bool commit) > +{ > + /* > + * Walk endpoints to find committed decoders then check if they > are not > + * interleaved (but path full is set up). > + */ > + object_child_foreach_recursive(object_get_root(), > + update_non_interleaved, &commit); > + > + return false; > +} > + > static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, > uint64_t *data, unsigned size, MemTxAttrs attrs) > { > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c > index be609ff9d0..8cdb3bff7e 100644 > --- a/hw/mem/cxl_type3.c > +++ b/hw/mem/cxl_type3.c > @@ -427,6 +427,8 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, > int which) ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, > 1); > stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, > ctrl); + > + cfmws_update_non_interleaved(true); > } > > static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which) > @@ -442,6 +444,8 @@ static void hdm_decoder_uncommit(CXLType3Dev > *ct3d, int which) ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, > COMMITTED, 0); > stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, > ctrl); + > + cfmws_update_non_interleaved(false); > } > > static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err) > diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h > index 998f495a98..d5d2a9efb5 100644 > --- a/include/hw/cxl/cxl.h > +++ b/include/hw/cxl/cxl.h > @@ -71,4 +71,5 @@ CXLComponentState > *cxl_usp_to_cstate(CXLUpstreamPort *usp); typedef struct > CXLDownstreamPort CXLDownstreamPort; > DECLARE_INSTANCE_CHECKER(CXLDownstreamPort, CXL_DSP, TYPE_CXL_DSP) > +bool cfmws_update_non_interleaved(bool); > #endif > diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h > index 89411c8093..1d199d035e 100644 > --- a/include/hw/cxl/cxl_device.h > +++ b/include/hw/cxl/cxl_device.h > @@ -584,6 +584,7 @@ struct CXLType3Dev { > uint64_t sn; > > /* State */ > + MemoryRegion direct_mr[CXL_HDM_DECODER_COUNT]; > AddressSpace hostvmem_as; > AddressSpace hostpmem_as; > CXLComponentState cxl_cstate; > @@ -671,6 +672,15 @@ struct CSWMBCCIDev { > CXLCCI *cci; > }; > > +struct cxl_direct_pt_state { > + CXLType3Dev *ct3d; > + hwaddr decoder_base; > + hwaddr decoder_size; > + hwaddr dpa_base; > + unsigned int hdm_decoder_idx; > + bool commit; > +}; > + > #define TYPE_CXL_SWITCH_MAILBOX_CCI "cxl-switch-mailbox-cci" > OBJECT_DECLARE_TYPE(CSWMBCCIDev, CSWMBCCIClass, > CXL_SWITCH_MAILBOX_CCI) Added anisa, linux-cxl, and linux-arm to the CC.
