Re: [PATCH v4 4/6] hw/cxl: QMP based poison injection support

2023-03-14 Thread Fan Ni
The 03/03/2023 15:09, Jonathan Cameron wrote:
> Inject poison using qmp command cxl-inject-poison to add an entry to the
> poison list.
> 
> For now, the poison is not returned CXL.mem reads, but only via the
> mailbox command Get Poison List.
> 
> See CXL rev 3.0, sec 8.2.9.8.4.1 Get Poison list (Opcode 4300h)
> 
> Kernel patches to use this interface here:
> https://lore.kernel.org/linux-cxl/cover.1665606782.git.alison.schofi...@intel.com/
> 
> To inject poison using qmp (telnet to the qmp port)
> { "execute": "qmp_capabilities" }
> 
> { "execute": "cxl-inject-poison",
> "arguments": {
>  "path": "/machine/peripheral/cxl-pmem0",
>  "start": 2048,
>  "length": 256
> }
> }
> 
> Adjusted to select a device on your machine.
> 
> Note that the poison list supported is kept short enough to avoid the
> complexity of state machine that is needed to handle the MORE flag.
> 
> Signed-off-by: Jonathan Cameron 
> 

Reviewed-by: Fan Ni 

> ---
> v4:
>  - Widen the mask on Poison source (lower bits of the address)
>to allow for Vendor Defined. Change will make it easier to potentially
>add a means to inject such poison in the future. Today it has no
>impact.
> ---
>  hw/cxl/cxl-mailbox-utils.c  | 90 +
>  hw/mem/cxl_type3.c  | 56 +++
>  hw/mem/cxl_type3_stubs.c|  6 +++
>  include/hw/cxl/cxl_device.h | 20 +
>  qapi/cxl.json   | 18 
>  5 files changed, 190 insertions(+)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 702e16ca20..25933cf62c 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -62,6 +62,8 @@ enum {
>  #define GET_PARTITION_INFO 0x0
>  #define GET_LSA   0x2
>  #define SET_LSA   0x3
> +MEDIA_AND_POISON = 0x43,
> +#define GET_POISON_LIST0x0
>  };
>  
>  /* 8.2.8.4.5.1 Command Return Codes */
> @@ -295,6 +297,10 @@ static CXLRetCode cmd_identify_memory_device(struct 
> cxl_cmd *cmd,
>  stq_le_p(>persistent_capacity, cxl_dstate->pmem_size / 
> CXL_CAPACITY_MULTIPLIER);
>  stq_le_p(>volatile_capacity, cxl_dstate->vmem_size / 
> CXL_CAPACITY_MULTIPLIER);
>  stl_le_p(>lsa_size, cvc->get_lsa_size(ct3d));
> +/* 256 poison records */
> +st24_le_p(id->poison_list_max_mer, 256);
> +/* No limit - so limited by main poison record limit */
> +stw_le_p(>inject_poison_limit, 0);
>  
>  *len = sizeof(*id);
>  return CXL_MBOX_SUCCESS;
> @@ -384,6 +390,88 @@ static CXLRetCode cmd_ccls_set_lsa(struct cxl_cmd *cmd,
>  return CXL_MBOX_SUCCESS;
>  }
>  
> +/*
> + * This is very inefficient, but good enough for now!
> + * Also the payload will always fit, so no need to handle the MORE flag and
> + * make this stateful. We may want to allow longer poison lists to aid
> + * testing that kernel functionality.
> + */
> +static CXLRetCode cmd_media_get_poison_list(struct cxl_cmd *cmd,
> +CXLDeviceState *cxl_dstate,
> +uint16_t *len)
> +{
> +struct get_poison_list_pl {
> +uint64_t pa;
> +uint64_t length;
> +} QEMU_PACKED;
> +
> +struct get_poison_list_out_pl {
> +uint8_t flags;
> +uint8_t rsvd1;
> +uint64_t overflow_timestamp;
> +uint16_t count;
> +uint8_t rsvd2[0x14];
> +struct {
> +uint64_t addr;
> +uint32_t length;
> +uint32_t resv;
> +} QEMU_PACKED records[];
> +} QEMU_PACKED;
> +
> +struct get_poison_list_pl *in = (void *)cmd->payload;
> +struct get_poison_list_out_pl *out = (void *)cmd->payload;
> +CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
> +uint16_t record_count = 0, i = 0;
> +uint64_t query_start, query_length;
> +CXLPoisonList *poison_list = >poison_list;
> +CXLPoison *ent;
> +uint16_t out_pl_len;
> +
> +query_start = ldq_le_p(>pa);
> +/* 64 byte alignemnt required */
> +if (query_start & 0x3f) {
> +return CXL_MBOX_INVALID_INPUT;
> +}
> +query_length = ldq_le_p(>length) * 64;
> +
> +QLIST_FOREACH(ent, poison_list, node) {
> +/* Check for no overlap */
> +if (ent->start >= query_start + query_length ||
> +ent->start + ent->length <= query_start) {
> +continue;
> +}
> +record_count++;
> +}
> +out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
> +assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
> +
> +memset(out, 0, out_pl_len);
> +QLIST_FOREACH(ent, poison_list, node) {
> +uint64_t start, stop;
> +
> +/* Check for no overlap */
> +if (ent->start >= query_start + query_length ||
> +ent->start + ent->length <= query_start) {
> +continue;
> +}
> +
> +/* Deal with overlap */
> +  

Re: [PATCH v4 4/6] hw/cxl: QMP based poison injection support

2023-03-14 Thread Philippe Mathieu-Daudé

On 3/3/23 16:09, Jonathan Cameron wrote:

Inject poison using qmp command cxl-inject-poison to add an entry to the
poison list.

For now, the poison is not returned CXL.mem reads,


What do you mean?


but only via the
mailbox command Get Poison List.

See CXL rev 3.0, sec 8.2.9.8.4.1 Get Poison list (Opcode 4300h)

Kernel patches to use this interface here:
https://lore.kernel.org/linux-cxl/cover.1665606782.git.alison.schofi...@intel.com/

To inject poison using qmp (telnet to the qmp port)
{ "execute": "qmp_capabilities" }

{ "execute": "cxl-inject-poison",
 "arguments": {
  "path": "/machine/peripheral/cxl-pmem0",
  "start": 2048,
  "length": 256
 }
}

Adjusted to select a device on your machine.

Note that the poison list supported is kept short enough to avoid the
complexity of state machine that is needed to handle the MORE flag.

Signed-off-by: Jonathan Cameron 

---
v4:
  - Widen the mask on Poison source (lower bits of the address)
to allow for Vendor Defined. Change will make it easier to potentially
add a means to inject such poison in the future. Today it has no
impact.
---
  hw/cxl/cxl-mailbox-utils.c  | 90 +
  hw/mem/cxl_type3.c  | 56 +++
  hw/mem/cxl_type3_stubs.c|  6 +++
  include/hw/cxl/cxl_device.h | 20 +
  qapi/cxl.json   | 18 
  5 files changed, 190 insertions(+)




+/*
+ * This is very inefficient, but good enough for now!
+ * Also the payload will always fit, so no need to handle the MORE flag and
+ * make this stateful. We may want to allow longer poison lists to aid
+ * testing that kernel functionality.
+ */
+static CXLRetCode cmd_media_get_poison_list(struct cxl_cmd *cmd,
+CXLDeviceState *cxl_dstate,
+uint16_t *len)
+{
+struct get_poison_list_pl {
+uint64_t pa;
+uint64_t length;
+} QEMU_PACKED;
+
+struct get_poison_list_out_pl {
+uint8_t flags;
+uint8_t rsvd1;
+uint64_t overflow_timestamp;
+uint16_t count;
+uint8_t rsvd2[0x14];
+struct {
+uint64_t addr;
+uint32_t length;
+uint32_t resv;
+} QEMU_PACKED records[];
+} QEMU_PACKED;
+
+struct get_poison_list_pl *in = (void *)cmd->payload;
+struct get_poison_list_out_pl *out = (void *)cmd->payload;
+CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate);
+uint16_t record_count = 0, i = 0;
+uint64_t query_start, query_length;
+CXLPoisonList *poison_list = >poison_list;
+CXLPoison *ent;
+uint16_t out_pl_len;
+
+query_start = ldq_le_p(>pa);
+/* 64 byte alignemnt required */
+if (query_start & 0x3f) {
+return CXL_MBOX_INVALID_INPUT;
+}
+query_length = ldq_le_p(>length) * 64;
+
+QLIST_FOREACH(ent, poison_list, node) {
+/* Check for no overlap */
+if (ent->start >= query_start + query_length ||
+ent->start + ent->length <= query_start) {
+continue;
+}
+record_count++;
+}
+out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+memset(out, 0, out_pl_len);
+QLIST_FOREACH(ent, poison_list, node) {
+uint64_t start, stop;
+
+/* Check for no overlap */
+if (ent->start >= query_start + query_length ||
+ent->start + ent->length <= query_start) {
+continue;
+}
+
+/* Deal with overlap */
+start = MAX(ent->start & 0xffc0, query_start);
+stop = MIN((ent->start & 0xffc0) + ent->length,


~63ull or ROUND_DOWN(, 64ull) could be easier to read.


+   query_start + query_length);
+stq_le_p(>records[i].addr, start | (ent->type & 0x7));
+stl_le_p(>records[i].length, (stop - start) / 64);
+i++;
+}
+if (ct3d->poison_list_overflowed) {
+out->flags = (1 << 1);
+stq_le_p(>overflow_timestamp, ct3d->poison_list_overflow_ts);
+}
+stw_le_p(>count, record_count);
+*len = out_pl_len;
+return CXL_MBOX_SUCCESS;
+}
+




diff --git a/qapi/cxl.json b/qapi/cxl.json
index 4be7d46041..9ebd680dfe 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -5,6 +5,24 @@
  # = CXL devices
  ##
  
+##

+# @cxl-inject-poison:
+#
+# Poison records indicate that a CXL memory device knows that a particular
+# memory region may be corrupted. This may be because of locally detected
+# errors (e.g. ECC failure) or poisoned writes received from other components
+# in the system. This injection mechanism enables testing of the OS handling
+# of poison records which may be queried via the CXL mailbox.
+#
+# @path: CXL type 3 device canonical QOM path
+# @start: Start address - must be 64 byte aligned.
+# @length: Length of poison to inject - must be a 

Re: [PATCH v4 4/6] hw/cxl: QMP based poison injection support

2023-03-03 Thread Ira Weiny
Jonathan Cameron wrote:
> Inject poison using qmp command cxl-inject-poison to add an entry to the
> poison list.
> 
> For now, the poison is not returned CXL.mem reads, but only via the
> mailbox command Get Poison List.
> 
> See CXL rev 3.0, sec 8.2.9.8.4.1 Get Poison list (Opcode 4300h)
> 
> Kernel patches to use this interface here:
> https://lore.kernel.org/linux-cxl/cover.1665606782.git.alison.schofi...@intel.com/
> 
> To inject poison using qmp (telnet to the qmp port)
> { "execute": "qmp_capabilities" }
> 
> { "execute": "cxl-inject-poison",
> "arguments": {
>  "path": "/machine/peripheral/cxl-pmem0",
>  "start": 2048,
>  "length": 256
> }
> }
> 
> Adjusted to select a device on your machine.
> 
> Note that the poison list supported is kept short enough to avoid the
> complexity of state machine that is needed to handle the MORE flag.
> 

Reviewed-by: Ira Weiny 

> Signed-off-by: Jonathan Cameron 
> 
> ---
> v4:
>  - Widen the mask on Poison source (lower bits of the address)
>to allow for Vendor Defined. Change will make it easier to potentially
>add a means to inject such poison in the future. Today it has no
>impact.

[...]