On 04/02/2026 23:00, Alireza Sanaee wrote:
> The CXL address to device decoding logic is complex because of the need
> to correctly decode fine grained interleave. The current implementation
> prevents use with KVM where executed instructions may reside in that
> memory and gives very slow performance even in TCG.


Do you mean KVM will work after this patch in 1-way case


> 
> In many real cases non interleaved memory configurations are useful and
> for those we can use a more conventional memory region alias allowing
> similar performance to other memory in the system.


Do you have any benchmark data to demonstrate the performance improvement?



> 
> Co-developed-by: Jonathan Cameron <[email protected]>
> Signed-off-by: Jonathan Cameron <[email protected]>
> Signed-off-by: Alireza Sanaee <[email protected]>
> ---
>   hw/cxl/cxl-component-utils.c |   8 ++
>   hw/cxl/cxl-host.c            | 197 ++++++++++++++++++++++++++++++++++-
>   hw/mem/cxl_type3.c           |   4 +
>   include/hw/cxl/cxl.h         |   1 +
>   include/hw/cxl/cxl_device.h  |   1 +
>   5 files changed, 206 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> index d36162e91b..d459d04b6d 100644
> --- a/hw/cxl/cxl-component-utils.c
> +++ b/hw/cxl/cxl-component-utils.c
> @@ -142,6 +142,14 @@ static void dumb_hdm_handler(CXLComponentState 
> *cxl_cstate, hwaddr offset,
>           value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, COMMITTED, 0);
>       }
>       stl_le_p((uint8_t *)cache_mem + offset, value);
> +
> +    if (should_commit) {
> +        cfmws_update_non_interleaved(true);
> +    }
> +
> +    if (should_uncommit) {
> +        cfmws_update_non_interleaved(false);
> +    }


This might be slightly better as an if-else block, since should_commit and 
should_uncommit are mutually exclusive:


if (should_commit) {
        cfmws_update_non_interleaved(true);;
} else if (should_uncommit) {
        cfmws_update_non_interleaved(false);
}


>   }
>   
>   static void bi_handler(CXLComponentState *cxl_cstate, hwaddr offset,
> diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
> index 9633b01abf..1fcfe01164 100644
> --- a/hw/cxl/cxl-host.c
> +++ b/hw/cxl/cxl-host.c
> @@ -104,7 +104,7 @@ void cxl_fmws_link_targets(Error **errp)
>   }
>   
>   static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr,
> -                                uint8_t *target)
> +                                uint8_t *target, bool *interleaved)
>   {
>       int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
>       unsigned int hdm_count;
> @@ -138,6 +138,11 @@ static bool cxl_hdm_find_target(uint32_t *cache_mem, 
> hwaddr addr,
>           found = true;
>           ig_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IG);
>           iw_enc = FIELD_EX32(ctrl, CXL_HDM_DECODER0_CTRL, IW);
> +
> +        if (interleaved) {
> +            *interleaved = iw_enc != 0;
> +        }
> +
>           target_idx = (addr / cxl_decode_ig(ig_enc)) % (1 << iw_enc);
>   
>           if (target_idx < 4) {
> @@ -157,7 +162,8 @@ static bool cxl_hdm_find_target(uint32_t *cache_mem, 
> hwaddr addr,
>       return found;

[...]

> +
> +static int update_non_interleaved(Object *obj, void *opaque)
> +{
> +    const int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - 
> R_CXL_HDM_DECODER0_BASE_LO;
> +    bool commit = *(bool *)opaque;
> +    CXLType3Dev *ct3d;
> +    uint32_t *cache_mem;
> +    unsigned int hdm_count, i;
> +    uint32_t cap;
> +    uint64_t dpa_base = 0;
> +
> +    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> +        return 0;
> +    }
> +
> +    ct3d = CXL_TYPE3(obj);
> +    cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
> +    cap = ldl_le_p(cache_mem + R_CXL_HDM_DECODER_CAPABILITY);
> +    hdm_count = cxl_decoder_count_dec(FIELD_EX32(cap,
> +                                                 CXL_HDM_DECODER_CAPABILITY,
> +                                                 DECODER_COUNT));
> +    /*
> +     * Walk the decoders and find any committed with iw set to 0
> +     * (non interleaved).
> +     */
> +    for (i = 0; i < hdm_count; i++) {
> +        uint64_t decoder_base, decoder_size, skip;
> +        uint32_t hdm_ctrl, low, high;
> +        int iw, committed;
> +
> +        hdm_ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL + i * 
> hdm_inc);
> +        committed = FIELD_EX32(hdm_ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED);
> +        if (commit ^ committed) {


Is there any specific reason for using ^ here instead of '==' ?



Thanks
Zhijian

Reply via email to