On Mon, 29 Sep 2025 20:21:52 -0700
Davidlohr Bueso <[email protected]> wrote:

> Add basic plumbing for memory expander devices that support Back
> Invalidation. This introduces a 'hdm-db=on|off' parameter and
> exposes the relevant BI RT/Decoder component cachemem registers.
> 
> Some noteworthy properties:
>  - Devices require enabling Flit mode.
>  - Explicit BI-ID commit is required.
>  - HDM decoder support both host and dev coherency models.
> 
> Signed-off-by: Davidlohr Bueso <[email protected]>
Hi Davidlohr,

Comments inline mostly focus on the bi parameter.  I think flipping
it to true for components where we are hard coding it as true will
move that logic decision up a layer and make the code easier to follow.

Thanks,

Jonathan

> diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
> index a43d227336ca..2098e9999a88 100644
> --- a/hw/cxl/cxl-component-utils.c
> +++ b/hw/cxl/cxl-component-utils.c

> @@ -235,7 +309,7 @@ static void ras_init_common(uint32_t *reg_state, uint32_t 
> *write_msk)
>  }
>  
>  static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
> -                            enum reg_type type)
> +                            enum reg_type type, bool bi)
>  {
>      int decoder_count = CXL_HDM_DECODER_COUNT;
>      int hdm_inc = R_CXL_HDM_DECODER1_BASE_LO - R_CXL_HDM_DECODER0_BASE_LO;
> @@ -260,7 +334,9 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t 
> *write_msk,
>                       UIO_DECODER_COUNT, 0);
>      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, MEMDATA_NXM_CAP, 
> 0);
>      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY,
> -                     SUPPORTED_COHERENCY_MODEL, 0); /* Unknown */
> +                     SUPPORTED_COHERENCY_MODEL,
> +                     /* host+dev or Unknown */
> +                     type == CXL2_TYPE3_DEVICE && bi ? 3 : 0);
>      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_GLOBAL_CONTROL,
>                       HDM_DECODER_ENABLE, 0);
>      write_msk[R_CXL_HDM_DECODER_GLOBAL_CONTROL] = 0x3;
> @@ -271,8 +347,7 @@ static void hdm_init_common(uint32_t *reg_state, uint32_t 
> *write_msk,
>          write_msk[R_CXL_HDM_DECODER0_SIZE_HI + i * hdm_inc] = 0xffffffff;
>          write_msk[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc] = 0x13ff;
>          if (type == CXL2_DEVICE ||
> -            type == CXL2_TYPE3_DEVICE ||
> -            type == CXL2_LOGICAL_DEVICE) {
> +            type == CXL2_TYPE3_DEVICE || type == CXL2_LOGICAL_DEVICE) {

Unrelated change? Or am I missing something real here?

>              write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * hdm_inc] =
>                  0xf0000000;
>          } else {
> @@ -283,9 +358,43 @@ static void hdm_init_common(uint32_t *reg_state, 
> uint32_t *write_msk,


>  void cxl_component_register_init_common(uint32_t *reg_state,
>                                          uint32_t *write_msk,
> -                                        enum reg_type type)
> +                                        enum reg_type type,
> +                                        bool bi)

I wonder if we shouldn't set bi for all 3 component types that actually
have the BI related capability?   For Type3 we keep it controllable
and for DSP and RP hard code the parameter to true.

>  {
>      int caps = 0;
>  
> @@ -325,7 +434,7 @@ void cxl_component_register_init_common(uint32_t 
> *reg_state,
>      case CXL2_LOGICAL_DEVICE:
>          /* + HDM */
>          init_cap_reg(HDM, 5, 1);
> -        hdm_init_common(reg_state, write_msk, type);
> +        hdm_init_common(reg_state, write_msk, type, bi);
>          /* fallthrough */
>      case CXL2_DOWNSTREAM_PORT:
>      case CXL2_DEVICE:
> @@ -340,6 +449,26 @@ void cxl_component_register_init_common(uint32_t 
> *reg_state,
>          abort();
>      }
>  
> +    /* back invalidate */

With bi set true in cases where there is anything to do here, could wrap
this in an if (bi)

> +    switch (type) {
> +    case CXL2_UPSTREAM_PORT:
> +        init_cap_reg(BI_RT, 11, CXL_BI_RT_CAP_VERSION);
> +        bi_rt_init_common(reg_state, write_msk);
> +        break;
> +    case CXL2_ROOT_PORT:
> +    case CXL2_DOWNSTREAM_PORT:
> +    case CXL2_TYPE3_DEVICE:
> +        if (type == CXL2_TYPE3_DEVICE && !bi)  {

With the values for the other types tweaked this check becomes unnecessary

> +            break;
> +        }
> +
> +        init_cap_reg(BI_DECODER, 12, CXL_BI_DECODER_CAP_VERSION);
> +        bi_decoder_init_common(reg_state, write_msk, type);
> +        break;
> +    default:
> +        break;
> +    }
> +
>      ARRAY_FIELD_DP32(reg_state, CXL_CAPABILITY_HEADER, ARRAY_SIZE, caps);
>  #undef init_cap_reg
>  }

>  
>  static uint64_t get_lsa_size(CXLType3Dev *ct3d)
> diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
> index f8d64263ac08..e0593e783803 100644
> --- a/hw/pci-bridge/cxl_downstream.c
> +++ b/hw/pci-bridge/cxl_downstream.c
> @@ -42,7 +42,7 @@ static void latch_registers(CXLDownstreamPort *dsp)
>      uint32_t *write_msk = dsp->cxl_cstate.crb.cache_mem_regs_write_mask;
>  
>      cxl_component_register_init_common(reg_state, write_msk,
> -                                       CXL2_DOWNSTREAM_PORT);
> +                                       CXL2_DOWNSTREAM_PORT, false);
This false briefly confused me and is the reason for comment above.
DSPs and RPs have BI support and it's odd to set a parameter here called
bi to false, only to enable it always.

>  }
>  
>  /* TODO: Look at sharing this code across all CXL port types */
> diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
> index f3472f081707..1c0087d3f111 100644
> --- a/hw/pci-bridge/cxl_root_port.c
> +++ b/hw/pci-bridge/cxl_root_port.c
> @@ -106,7 +106,8 @@ static void latch_registers(CXLRootPort *crp)
>      uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
>      uint32_t *write_msk = crp->cxl_cstate.crb.cache_mem_regs_write_mask;
>  
> -    cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
> +    cxl_component_register_init_common(reg_state, write_msk,
> +                                       CXL2_ROOT_PORT, false);

Same here.  Also I think under 80 chars with CXL2_ROOT_PORT, on first line


>  }
>  
>  static void build_dvsecs(PCIDevice *d, CXLComponentState *cxl)
> diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c
> index e5a0d1fb308c..4bc185df8c87 100644
> --- a/hw/pci-bridge/cxl_upstream.c
> +++ b/hw/pci-bridge/cxl_upstream.c
> @@ -136,7 +136,7 @@ static void latch_registers(CXLUpstreamPort *usp)
>      uint32_t *write_msk = usp->cxl_cstate.crb.cache_mem_regs_write_mask;
>  
>      cxl_component_register_init_common(reg_state, write_msk,
> -                                       CXL2_UPSTREAM_PORT);
> +                                       CXL2_UPSTREAM_PORT, false);

Obviously different structure but still seems odd that bi is false yet we create
BI_RT structure in this call.

>      ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
>  }

> diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
> index cd92cb02532a..0ff9f5b0fddf 100644
> --- a/include/hw/cxl/cxl_component.h
> +++ b/include/hw/cxl/cxl_component.h
> @@ -67,6 +67,8 @@ CXLx_CAPABILITY_HEADER(LINK, 2)
>  CXLx_CAPABILITY_HEADER(HDM, 3)
>  CXLx_CAPABILITY_HEADER(EXTSEC, 4)
>  CXLx_CAPABILITY_HEADER(SNOOP, 5)
> +CXLx_CAPABILITY_HEADER(BI_RT, 6)
> +CXLx_CAPABILITY_HEADER(BI_DECODER, 7)
>  
>  /*
>   * Capability structures contain the actual registers that the CXL component
> @@ -211,10 +213,55 @@ HDM_DECODER_INIT(3);
>      (CXL_IDE_REGISTERS_OFFSET + CXL_IDE_REGISTERS_SIZE)
>  #define CXL_SNOOP_REGISTERS_SIZE   0x8
>  
> -QEMU_BUILD_BUG_MSG((CXL_SNOOP_REGISTERS_OFFSET +
> -                    CXL_SNOOP_REGISTERS_SIZE) >= 0x1000,
> +#define CXL_BI_RT_CAP_VERSION 1
> +#define CXL_BI_RT_REGISTERS_OFFSET \
> +    (CXL_SNOOP_REGISTERS_OFFSET + CXL_SNOOP_REGISTERS_SIZE)
> +#define CXL_BI_RT_REGISTERS_SIZE   0xC
> +
> +REG32(CXL_BI_RT_CAPABILITY, CXL_BI_RT_REGISTERS_OFFSET)
> +    FIELD(CXL_BI_RT_CAPABILITY, EXPLICIT_COMMIT, 0, 1)
> +REG32(CXL_BI_RT_CTRL, CXL_BI_RT_REGISTERS_OFFSET + 0x4)
> +    FIELD(CXL_BI_RT_CTRL, COMMIT, 0, 1)
> +REG32(CXL_BI_RT_STATUS, CXL_BI_RT_REGISTERS_OFFSET + 0x8)
> +    FIELD(CXL_BI_RT_STATUS, COMMITTED, 0, 1)
> +    FIELD(CXL_BI_RT_STATUS, ERR_NOT_COMMITTED, 1, 1)
> +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_SCALE, 8, 4)
> +    FIELD(CXL_BI_RT_STATUS, COMMIT_TMO_BASE, 12, 4)
> +
> +/* CXL r3.2 8.2.4.27 - CXL BI Decoder Capability Structure */
> +#define CXL_BI_DECODER_CAP_VERSION 1
> +#define CXL_BI_DECODER_REGISTERS_OFFSET \
> +    (CXL_BI_RT_REGISTERS_OFFSET + CXL_BI_RT_REGISTERS_SIZE)
> +#define CXL_BI_DECODER_REGISTERS_SIZE   0xC
> +
> +REG32(CXL_BI_DECODER_CAPABILITY, CXL_BI_DECODER_REGISTERS_OFFSET)
> +    FIELD(CXL_BI_DECODER_CAPABILITY, HDM_D, 0, 1)
> +    FIELD(CXL_BI_DECODER_CAPABILITY, EXPLICIT_COMMIT, 1, 1)
> +REG32(CXL_BI_DECODER_CTRL, CXL_BI_DECODER_REGISTERS_OFFSET + 0x4)
> +    FIELD(CXL_BI_DECODER_CTRL, BI_FW, 0, 1)
> +    FIELD(CXL_BI_DECODER_CTRL, BI_ENABLE, 1, 1)
> +    FIELD(CXL_BI_DECODER_CTRL, COMMIT, 2, 1)
> +REG32(CXL_BI_DECODER_STATUS, CXL_BI_DECODER_REGISTERS_OFFSET + 0x8)
> +    FIELD(CXL_BI_DECODER_STATUS, COMMITTED, 0, 1)
> +    FIELD(CXL_BI_DECODER_STATUS, ERR_NOT_COMMITTED, 1, 1)
> +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_SCALE, 8, 4)
> +    FIELD(CXL_BI_DECODER_STATUS, COMMIT_TMO_BASE, 12, 4)
> +
> +QEMU_BUILD_BUG_MSG((CXL_BI_DECODER_REGISTERS_OFFSET +
> +                    CXL_BI_DECODER_REGISTERS_SIZE) >= 0x1000,
>                     "No space for registers");
>  
> +/* track BI explicit commit handling for route table and decoder */
> +enum {
> +    CXL_BISTATE_RT = 0,
> +    CXL_BISTATE_DECODER,
> +    CXL_BISTATE_MAX
> +};
> +
> +typedef struct bi_state {
> +    uint64_t last_commit;  /* last 0->1 transition */
> +} BIState;
> +
>  typedef struct component_registers {
>      /*
>       * Main memory region to be registered with QEMU core.
> @@ -260,6 +307,7 @@ typedef struct cxl_component {
>  
>      CDATObject cdat;
>      CXLCompObject compliance;
> +    BIState bi_state[CXL_BISTATE_MAX];
>  } CXLComponentState;
>  
>  void cxl_component_register_block_init(Object *obj,
> @@ -267,7 +315,7 @@ void cxl_component_register_block_init(Object *obj,
>                                         const char *type);
>  void cxl_component_register_init_common(uint32_t *reg_state,
>                                          uint32_t *write_msk,
> -                                        enum reg_type type);
> +                                        enum reg_type type, bool bi);
>  
>  void cxl_component_create_dvsec(CXLComponentState *cxl_cstate,
>                                  enum reg_type cxl_dev_type, uint16_t length,
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 0abfd678b875..75603b8180b5 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -841,6 +841,9 @@ struct CXLType3Dev {
>      CXLMemSparingReadAttrs rank_sparing_attrs;
>      CXLMemSparingWriteAttrs rank_sparing_wr_attrs;
>  
> +    /* BI flows */
> +    bool hdmdb;
> +
>      struct dynamic_capacity {
>          HostMemoryBackend *host_dc;
>          AddressSpace host_dc_as;


Reply via email to