On Fri, Mar 06, 2026 at 12:11:51PM +0000, Alireza Sanaee wrote:
> The CXL address to device decoding logic is complex because of the need to
> correctly decode fine grained interleave. The current implementation
> prevents use with KVM where executed instructions may reside in that memory
> and gives very slow performance even in TCG.
> 
> In many real cases non interleaved memory configurations are useful and for
> those we can use a more conventional memory region alias allowing similar
> performance to other memory in the system.
> 
> Whether this fast path is applicable can be established once the full set
> of HDM decoders has been committed (in whatever order the guest decides to
> commit them). As such a check is performed on each commit/uncommit of HDM
> decoder to establish if the alias should be added or removed.
> 
> Reviewed-by: Li Zhijian <[email protected]>
> Co-developed-by: Jonathan Cameron <[email protected]>
> Signed-off-by: Jonathan Cameron <[email protected]>
> Signed-off-by: Alireza Sanaee <[email protected]>

Reviewed-by: Gregory Price <[email protected]>
Tested-by: Gregory Price <[email protected]>

> ---
> Thanks to Gregory, and Li Zhijian for their feedback.
> v6->v7:
>           - Fixed a div by zero situation in the code for 
> interleaved_ways_dec func.
>           - Changed the signature of cfmws_update_non_interleaved function to 
> void.
> 
>  hw/cxl/cxl-component-utils.c |   6 ++
>  hw/cxl/cxl-host.c            | 197 +++++++++++++++++++++++++++++++++++
>  hw/mem/cxl_type3.c           |   4 +
>  include/hw/cxl/cxl.h         |   1 +
>  include/hw/cxl/cxl_device.h  |   4 +
>  5 files changed, 212 insertions(+)
> 
> diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> index 07aabe331c..a624357978 100644
> --- a/hw/cxl/cxl-component-utils.c
> +++ b/hw/cxl/cxl-component-utils.c
> @@ -143,6 +143,12 @@ static void dumb_hdm_handler(CXLComponentState 
> *cxl_cstate, hwaddr offset,
>          value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, COMMITTED, 0);
>      }
>      stl_le_p((uint8_t *)cache_mem + offset, value);
> +
> +    if (should_commit) {
> +        cfmws_update_non_interleaved(true);
> +    } else if (should_uncommit) {
> +        cfmws_update_non_interleaved(false);
> +    }
>  }
>  
>  static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
> index 2dc9f77007..079b27133b 100644
> --- a/hw/cxl/cxl-host.c
> +++ b/hw/cxl/cxl-host.c
> @@ -264,6 +264,203 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow 
> *fw, hwaddr addr,
>      return d;
>  }
>  
> +typedef struct CXLDirectPTState {
> +    CXLType3Dev *ct3d;
> +    hwaddr decoder_base;
> +    hwaddr decoder_size;
> +    hwaddr dpa_base;
> +    unsigned int hdm_decoder_idx;
> +} CXLDirectPTState;
> +
> +static void cxl_fmws_direct_passthrough_setup(CXLDirectPTState *state,
> +                                              CXLFixedWindow *fw)
> +{
> +    CXLType3Dev *ct3d = state->ct3d;
> +    MemoryRegion *mr = NULL;
> +    uint64_t vmr_size = 0, pmr_size = 0, offset = 0;
> +    MemoryRegion *direct_mr;
> +    g_autofree char *direct_mr_name;
> +    unsigned int idx = state->hdm_decoder_idx;
> +
> +    if (ct3d->hostvmem) {
> +        MemoryRegion *vmr = host_memory_backend_get_memory(ct3d->hostvmem);
> +
> +        vmr_size = memory_region_size(vmr);
> +        if (state->dpa_base < vmr_size) {
> +            mr = vmr;
> +            offset = state->dpa_base;
> +        }
> +    }
> +    if (!mr && ct3d->hostpmem) {
> +        MemoryRegion *pmr = host_memory_backend_get_memory(ct3d->hostpmem);
> +
> +        pmr_size = memory_region_size(pmr);
> +        if (state->dpa_base - vmr_size < pmr_size) {
> +            mr = pmr;
> +            offset = state->dpa_base - vmr_size;
> +        }
> +    }
> +    if (!mr) {
> +        return;
> +    }
> +
> +    if (ct3d->direct_mr_fw[idx]) {
> +        return;
> +    }
> +
> +    direct_mr = &ct3d->direct_mr[idx];
> +    direct_mr_name = g_strdup_printf("cxl-direct-mapping-alias-%u", idx);
> +    if (!direct_mr_name) {
> +        return;
> +    }
> +
> +    memory_region_init_alias(direct_mr, OBJECT(ct3d), direct_mr_name, mr,
> +                             offset, state->decoder_size);
> +    memory_region_add_subregion(&fw->mr,
> +                                state->decoder_base - fw->base, direct_mr);
> +    ct3d->direct_mr_fw[idx] = fw;
> +}
> +
> +static void cxl_fmws_direct_passthrough_remove(CXLType3Dev *ct3d,
> +                                               uint64_t decoder_base,
> +                                               unsigned int idx)
> +{
> +    CXLFixedWindow *owner_fw = ct3d->direct_mr_fw[idx];
> +    MemoryRegion *direct_mr = &ct3d->direct_mr[idx];
> +
> +    if (!owner_fw) {
> +        return;
> +    }
> +
> +    if (!memory_region_is_mapped(direct_mr)) {
> +        return;
> +    }
> +
> +    if (cxl_cfmws_find_device(owner_fw, decoder_base, false)) {
> +        return;
> +    }
> +
> +    memory_region_del_subregion(&owner_fw->mr, direct_mr);
> +    object_unparent(OBJECT(direct_mr));
> +    ct3d->direct_mr_fw[idx] = NULL;
> +}
> +
> +static int cxl_fmws_direct_passthrough(Object *obj, void *opaque)
> +{
> +    CXLDirectPTState *state = opaque;
> +    CXLFixedWindow *fw;
> +
> +    if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) {
> +        return 0;
> +    }
> +
> +    fw = CXL_FMW(obj);
> +
> +    /* Verify not interleaved */
> +    if (!cxl_cfmws_find_device(fw, state->decoder_base, false)) {
> +        return 0;
> +    }
> +
> +    cxl_fmws_direct_passthrough_setup(state, fw);
> +
> +    return 0;
> +}
> +
> +static int update_non_interleaved(Object *obj, void *opaque)
> +{
> +    const int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - 
> R_CXL_HDM_DECODER0_BASE_LO;
> +    bool commit = *(bool *)opaque;
> +    CXLType3Dev *ct3d;
> +    uint32_t *cache_mem;
> +    unsigned int hdm_count, i;
> +    int interleave_ways_dec;
> +    uint32_t cap;
> +    uint64_t dpa_base = 0;
> +
> +    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> +        return 0;
> +    }
> +
> +    ct3d = CXL_TYPE3(obj);
> +    cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> +    cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY);
> +    hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap,
> +                                                 CXL_HDM_DECODER_CAPABILITY,
> +                                                 DECODER_COUNT));
> +    for (i = 0; i < hdm_count; i++) {
> +        uint64_t decoder_base, decoder_size, skip;
> +        uint32_t hdm_ctrl, low, high;
> +        int iw, committed;
> +
> +        hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * 
> hdm_inc);
> +        committed = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED);
> +
> +        /*
> +         * Optimization: Looking for a fully committed path; if the type 3 
> HDM
> +         * decoder is not commmitted, it cannot lie on such a path.
> +         */
> +        if (commit && !committed) {
> +            return 0;
> +        }
> +
> +        low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_LO +
> +                       i * hdm_inc);
> +        high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_DPA_SKIP_HI +
> +                        i * hdm_inc);
> +        skip = ((uint64_t)high << 32) | (low & 0xf0000000);
> +        dpa_base += skip;
> +
> +        low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_LO + i * hdm_inc);
> +        high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_SIZE_HI + i * 
> hdm_inc);
> +        decoder_size = ((uint64_t)high << 32) | (low & 0xf0000000);
> +
> +        low = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_LO + i * hdm_inc);
> +        high = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_BASE_HI + i * 
> hdm_inc);
> +        decoder_base = ((uint64_t)high << 32) | (low & 0xf0000000);
> +
> +        iw = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, IW);
> +
> +        if (iw == 0) {
> +            if (!commit) {
> +                cxl_fmws_direct_passthrough_remove(ct3d, decoder_base, i);
> +            } else {
> +                CXLDirectPTState state = {
> +                    .ct3d = ct3d,
> +                    .decoder_base = decoder_base,
> +                    .decoder_size = decoder_size,
> +                    .dpa_base = dpa_base,
> +                    .hdm_decoder_idx = i,
> +                };
> +
> +                object_child_foreach_recursive(object_get_root(),
> +                                               cxl_fmws_direct_passthrough,
> +                                               &state);
> +            }
> +        }
> +
> +        interleave_ways_dec = cxl_interleave_ways_dec(iw, &error_fatal);
> +        if (interleave_ways_dec == 0) {
> +            return 0;
> +        }
> +
> +        dpa_base += decoder_size / interleave_ways_dec;
> +    }
> +
> +    return 0;
> +}
> +
> +void cfmws_update_non_interleaved(bool commit)
> +{
> +    /*
> +     * Walk endpoints to find both committed and uncommitted decoders,
> +     * then check if they are not interleaved (but the path is fully set up).
> +     */
> +    object_child_foreach_recursive(object_get_root(),
> +                                   update_non_interleaved, &commit);
> +
> +    return;
> +}
> +
>  static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, uint64_t *data,
>                                    unsigned size, MemTxAttrs attrs)
>  {
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 4739239da3..d9fc0bec8f 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -427,6 +427,8 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int 
> which)
>      ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
>  
>      stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
> +
> +    cfmws_update_non_interleaved(true);
>  }
>  
>  static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which)
> @@ -442,6 +444,8 @@ static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int 
> which)
>      ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 0);
>  
>      stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + which * hdm_inc, ctrl);
> +
> +    cfmws_update_non_interleaved(false);
>  }
>  
>  static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
> diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
> index 998f495a98..d8cd8359d2 100644
> --- a/include/hw/cxl/cxl.h
> +++ b/include/hw/cxl/cxl.h
> @@ -71,4 +71,5 @@ CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp);
>  typedef struct CXLDownstreamPort CXLDownstreamPort;
>  DECLARE_INSTANCE_CHECKER(CXLDownstreamPort, CXL_DSP, TYPE_CXL_DSP)
>  
> +void cfmws_update_non_interleaved(bool commit);
>  #endif
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 393f312217..ba551fa5f9 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -685,6 +685,8 @@ typedef struct CXLSetFeatureInfo {
>      size_t data_size;
>  } CXLSetFeatureInfo;
>  
> +typedef struct CXLFixedWindow CXLFixedWindow;
> +
>  struct CXLSanitizeInfo;
>  
>  typedef struct CXLAlertConfig {
> @@ -712,6 +714,8 @@ struct CXLType3Dev {
>      uint64_t sn;
>  
>      /* State */
> +    MemoryRegion direct_mr[CXL_HDM_DECODER_COUNT];
> +    CXLFixedWindow *direct_mr_fw[CXL_HDM_DECODER_COUNT];
>      AddressSpace hostvmem_as;
>      AddressSpace hostpmem_as;
>      CXLComponentState cxl_cstate;
> -- 
> 2.43.0
> 

Reply via email to