On 04/02/2026 23:00, Alireza Sanaee wrote: > The CXL address to device decoding logic is complex because of the need > to correctly decode fine grained interleave. The current implementation > prevents use with KVM where executed instructions may reside in that > memory and gives very slow performance even in TCG.
Do you mean KVM will work after this patch in 1-way case > > In many real cases non interleaved memory configurations are useful and > for those we can use a more conventional memory region alias allowing > similar performance to other memory in the system. Do you have any benchmark data to demonstrate the performance improvement? > > Co-developed-by: Jonathan Cameron <[email protected]> > Signed-off-by: Jonathan Cameron <[email protected]> > Signed-off-by: Alireza Sanaee <[email protected]> > --- > hw/cxl/cxl-component-utils.c | 8 ++ > hw/cxl/cxl-host.c | 197 ++++++++++++++++++++++++++++++++++- > hw/mem/cxl_type3.c | 4 + > include/hw/cxl/cxl.h | 1 + > include/hw/cxl/cxl_device.h | 1 + > 5 files changed, 206 insertions(+), 5 deletions(-) > > diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c > index d36162e91b..d459d04b6d 100644 > --- a/hw/cxl/cxl-component-utils.c > +++ b/hw/cxl/cxl-component-utils.c > @@ -142,6 +142,14 @@ static void dumb_hdm_handler(CXLComponentState > *cxl_cstate, hwaddr offset, > value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, COMMITTED, 0); > } > stl_le_p((uint8_t *)cache_mem + offset, value); > + > + if (should_commit) { > + cfmws_update_non_interleaved(true); > + } > + > + if (should_uncommit) { > + cfmws_update_non_interleaved(false); > + } This might be slightly better as an if-else block, since should_commit and should_uncommit are mutually exclusive: if (should_commit) { cfmws_update_non_interleaved(true);; } else if (should_uncommit) { cfmws_update_non_interleaved(false); } > } > > static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset, > diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c > index 9633b01abf..1fcfe01164 100644 > --- a/hw/cxl/cxl-host.c > +++ b/hw/cxl/cxl-host.c > @@ -104,7 +104,7 @@ void cxl_fmws_link_targets(Error **errp) > } > > static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr, > - uint8_t *target) > + uint8_t *target, bool *interleaved) > { > int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO; > unsigned int hdm_count; > @@ -138,6 +138,11 @@ static bool cxl_hdm_find_target(uint32_t *cache_mem, > hwaddr addr, > found = true; > ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG); > iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW); > + > + if (interleaved) { > + *interleaved = iw_enc != 0; > + } > + > target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc); > > if (target_idx < 4) { > @@ -157,7 +162,8 @@ static bool cxl_hdm_find_target(uint32_t *cache_mem, > hwaddr addr, > return found; [...] > + > +static int update_non_interleaved(Object *obj, void *opaque) > +{ > + const int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - > R_CXL_HDM_DECODER0_BASE_LO; > + bool commit = *(bool *)opaque; > + CXLType3Dev *ct3d; > + uint32_t *cache_mem; > + unsigned int hdm_count, i; > + uint32_t cap; > + uint64_t dpa_base = 0; > + > + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { > + return 0; > + } > + > + ct3d = CXL_TYPE3(obj); > + cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers; > + cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY); > + hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap, > + CXL_HDM_DECODER_CAPABILITY, > + DECODER_COUNT)); > + /* > + * Walk the decoders and find any committed with iw set to 0 > + * (non interleaved). > + */ > + for (i = 0; i < hdm_count; i++) { > + uint64_t decoder_base, decoder_size, skip; > + uint32_t hdm_ctrl, low, high; > + int iw, committed; > + > + hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * > hdm_inc); > + committed = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED); > + if (commit ^ committed) { Is there any specific reason for using ^ here instead of '==' ? Thanks Zhijian
