On Tue, Oct 31, 2023 at 02:20:45PM -0700, Dave Jiang wrote:
> The current operation for disable-region does not check if the memory
> covered by a region is online before attempting to disable the cxl region.
> Have the tool attempt to offline the relevant memory before attempting to
> disable the region(s). If offline fails, stop and return error.
> 
> Provide a -f option for the region to continue disable the region even if
> the memory is not offlined. Add a warning to state that the physical
> memory is being leaked and unrecoverable unless reboot due to disable without
> offline.
> 
> Signed-off-by: Dave Jiang <[email protected]>

Reviewed-by: Fan Ni <[email protected]>

> 
> ---
> v3:
> - Remove movable check. (Dan)
> - Attempt to offline if not offline. -f will disable region anyways even
>   if memory not offline. (Dan)
> v2:
> - Update documentation and help output. (Vishal)
> ---
>  Documentation/cxl/cxl-disable-region.txt |   10 ++++++
>  cxl/region.c                             |   54 
> +++++++++++++++++++++++++++++-
>  2 files changed, 63 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/cxl/cxl-disable-region.txt 
> b/Documentation/cxl/cxl-disable-region.txt
> index 4b0625e40bf6..9abf19e96094 100644
> --- a/Documentation/cxl/cxl-disable-region.txt
> +++ b/Documentation/cxl/cxl-disable-region.txt
> @@ -14,6 +14,10 @@ SYNOPSIS
>  
>  include::region-description.txt[]
>  
> +If there are memory blocks that are still online, the operation will attempt 
> to
> +offline the relevant blocks. If the offlining fails, the operation fails 
> when not
> +using the -f (force) parameter.
> +
>  EXAMPLE
>  -------
>  ----
> @@ -27,6 +31,12 @@ OPTIONS
>  -------
>  include::bus-option.txt[]
>  
> +-f::
> +--force::
> +     Attempt to disable-region even though memory cannot be offlined 
> successfully.
> +     Will emit warning that operation will permanently leak phiscal address 
> space
> +     and cannot be recovered until a reboot.
> +
>  include::decoder-option.txt[]
>  
>  include::debug-option.txt[]
> diff --git a/cxl/region.c b/cxl/region.c
> index bcd703956207..5cbbf2749e2d 100644
> --- a/cxl/region.c
> +++ b/cxl/region.c
> @@ -14,6 +14,7 @@
>  #include <util/parse-options.h>
>  #include <ccan/minmax/minmax.h>
>  #include <ccan/short_types/short_types.h>
> +#include <daxctl/libdaxctl.h>
>  
>  #include "filter.h"
>  #include "json.h"
> @@ -95,6 +96,8 @@ static const struct option enable_options[] = {
>  
>  static const struct option disable_options[] = {
>       BASE_OPTIONS(),
> +     OPT_BOOLEAN('f', "force", &param.force,
> +                 "attempt to offline memory before disabling the region"),
>       OPT_END(),
>  };
>  
> @@ -789,13 +792,62 @@ static int destroy_region(struct cxl_region *region)
>       return cxl_region_delete(region);
>  }
>  
> +static int disable_region(struct cxl_region *region)
> +{
> +     const char *devname = cxl_region_get_devname(region);
> +     struct daxctl_region *dax_region;
> +     struct daxctl_memory *mem;
> +     struct daxctl_dev *dev;
> +     int failed = 0, rc;
> +
> +     dax_region = cxl_region_get_daxctl_region(region);
> +     if (!dax_region)
> +             goto out;
> +
> +     daxctl_dev_foreach(dax_region, dev) {
> +             mem = daxctl_dev_get_memory(dev);
> +             if (!mem)
> +                     return -ENXIO;
> +
> +             /*
> +              * If memory is still online and user wants to force it, attempt
> +              * to offline it.
> +              */
> +             if (daxctl_memory_is_online(mem)) {
> +                     rc = daxctl_memory_offline(mem);
> +                     if (rc < 0) {
> +                             log_err(&rl, "%s: unable to offline %s: %s\n",
> +                                     devname,
> +                                     daxctl_dev_get_devname(dev),
> +                                     strerror(abs(rc)));
> +                             if (!param.force)
> +                                     return rc;
> +
> +                             failed++;
> +                     }
> +             }
> +     }
> +
> +     if (failed) {
> +             log_err(&rl, "%s: Forcing region disable without successful 
> offline.\n",
> +                     devname);
> +             log_err(&rl, "%s: Physical address space has now been 
> permanently leaked.\n",
> +                     devname);
> +             log_err(&rl, "%s: Leaked address cannot be recovered until a 
> reboot.\n",
> +                     devname);
> +     }
> +
> +out:
> +     return cxl_region_disable(region);
> +}
> +
>  static int do_region_xable(struct cxl_region *region, enum region_actions 
> action)
>  {
>       switch (action) {
>       case ACTION_ENABLE:
>               return cxl_region_enable(region);
>       case ACTION_DISABLE:
> -             return cxl_region_disable(region);
> +             return disable_region(region);
>       case ACTION_DESTROY:
>               return destroy_region(region);
>       default:
> 
> 

Reply via email to