On 12/9/25 10:14 AM, Ben Cheatham wrote:
> Add the 'cxl-clear-error' command. This command allows the user to clear
> device poison from CXL memory devices.
>
> Signed-off-by: Ben Cheatham <[email protected]>
Reviewed-by: Dave Jiang <[email protected]>
> ---
> cxl/builtin.h | 1 +
> cxl/cxl.c | 1 +
> cxl/inject-error.c | 70 ++++++++++++++++++++++++++++++++++++++++++----
> 3 files changed, 67 insertions(+), 5 deletions(-)
>
> diff --git a/cxl/builtin.h b/cxl/builtin.h
> index e82fcb5..68ed1de 100644
> --- a/cxl/builtin.h
> +++ b/cxl/builtin.h
> @@ -26,6 +26,7 @@ int cmd_enable_region(int argc, const char **argv, struct
> cxl_ctx *ctx);
> int cmd_disable_region(int argc, const char **argv, struct cxl_ctx *ctx);
> int cmd_destroy_region(int argc, const char **argv, struct cxl_ctx *ctx);
> int cmd_inject_error(int argc, const char **argv, struct cxl_ctx *ctx);
> +int cmd_clear_error(int argc, const char **argv, struct cxl_ctx *ctx);
> #ifdef ENABLE_LIBTRACEFS
> int cmd_monitor(int argc, const char **argv, struct cxl_ctx *ctx);
> #else
> diff --git a/cxl/cxl.c b/cxl/cxl.c
> index a98bd6b..e1740b5 100644
> --- a/cxl/cxl.c
> +++ b/cxl/cxl.c
> @@ -81,6 +81,7 @@ static struct cmd_struct commands[] = {
> { "destroy-region", .c_fn = cmd_destroy_region },
> { "monitor", .c_fn = cmd_monitor },
> { "inject-error", .c_fn = cmd_inject_error },
> + { "clear-error", .c_fn = cmd_clear_error },
> };
>
> int main(int argc, const char **argv)
> diff --git a/cxl/inject-error.c b/cxl/inject-error.c
> index c0a9eeb..4ba3de0 100644
> --- a/cxl/inject-error.c
> +++ b/cxl/inject-error.c
> @@ -19,6 +19,10 @@ static struct inject_params {
> const char *address;
> } inj_param;
>
> +static struct clear_params {
> + const char *address;
> +} clear_param;
> +
> static const struct option inject_options[] = {
> OPT_STRING('t', "type", &inj_param.type, "Error type",
> "Error type to inject into <device>"),
> @@ -30,6 +34,15 @@ static const struct option inject_options[] = {
> OPT_END(),
> };
>
> +static const struct option clear_options[] = {
> + OPT_STRING('a', "address", &clear_param.address, "Address for poison
> clearing",
> + "Device physical address to clear poison from in hex or
> decimal"),
> +#ifdef ENABLE_DEBUG
> + OPT_BOOLEAN(0, "debug", &debug, "turn on debug output"),
> +#endif
> + OPT_END(),
> +};
> +
> static struct log_ctx iel;
>
> static struct cxl_protocol_error *find_cxl_proto_err(struct cxl_ctx *ctx,
> @@ -102,7 +115,7 @@ static int inject_proto_err(struct cxl_ctx *ctx, const
> char *devname,
> }
>
> static int poison_action(struct cxl_ctx *ctx, const char *filter,
> - const char *addr_str)
> + const char *addr_str, bool clear)
> {
> struct cxl_memdev *memdev;
> size_t addr;
> @@ -129,12 +142,18 @@ static int poison_action(struct cxl_ctx *ctx, const
> char *filter,
> return -EINVAL;
> }
>
> - rc = cxl_memdev_inject_poison(memdev, addr);
> + if (clear)
> + rc = cxl_memdev_clear_poison(memdev, addr);
> + else
> + rc = cxl_memdev_inject_poison(memdev, addr);
> +
> if (rc)
> - log_err(&iel, "failed to inject poison at %s:%s: %s\n",
> + log_err(&iel, "failed to %s %s:%s: %s\n",
> + clear ? "clear poison at" : "inject point at",
> cxl_memdev_get_devname(memdev), addr_str,
> strerror(-rc));
> else
> - log_info(&iel, "poison injected at %s:%s\n",
> + log_info(&iel,
> + "poison %s at %s:%s\n", clear ? "cleared" : "injected",
> cxl_memdev_get_devname(memdev), addr_str);
>
> return rc;
> @@ -166,7 +185,7 @@ static int inject_action(int argc, const char **argv,
> struct cxl_ctx *ctx,
> }
>
> if (strcmp(inj_param.type, "poison") == 0) {
> - rc = poison_action(ctx, argv[0], inj_param.address);
> + rc = poison_action(ctx, argv[0], inj_param.address, false);
> return rc;
> }
>
> @@ -187,3 +206,44 @@ int cmd_inject_error(int argc, const char **argv, struct
> cxl_ctx *ctx)
>
> return rc ? EXIT_FAILURE : EXIT_SUCCESS;
> }
> +
> +static int clear_action(int argc, const char **argv, struct cxl_ctx *ctx,
> + const struct option *options, const char *usage)
> +{
> + const char * const u[] = {
> + usage,
> + NULL
> + };
> + int rc = -EINVAL;
> +
> + log_init(&iel, "cxl clear-error", "CXL_CLEAR_LOG");
> + argc = parse_options(argc, argv, options, u, 0);
> +
> + if (debug) {
> + cxl_set_log_priority(ctx, LOG_DEBUG);
> + iel.log_priority = LOG_DEBUG;
> + } else {
> + iel.log_priority = LOG_INFO;
> + }
> +
> + if (argc != 1) {
> + usage_with_options(u, options);
> + return rc;
> + }
> +
> + rc = poison_action(ctx, argv[0], clear_param.address, true);
> + if (rc) {
> + log_err(&iel, "Failed to inject poison into %s: %s\n",
> + argv[0], strerror(-rc));
> + return rc;
> + }
> +
> + return rc;
> +}
> +
> +int cmd_clear_error(int argc, const char **argv, struct cxl_ctx *ctx)
> +{
> + int rc = clear_action(argc, argv, ctx, clear_options,
> + "clear-error <device> [<options>]");
> + return rc ? EXIT_FAILURE : EXIT_SUCCESS;
> +}