On Wed, Jan 09, 2013 at 07:42:52AM +0000, Liu, Jinsong wrote:
> This patch implements real Xen acpi memory hotplug driver as module.
> When loaded, it replaces Xen stub driver.
> 
> When an acpi memory device hotadd event occurs, it notifies OS and
> invokes notification callback, adding related memory device and parsing
> memory information, finally hypercall to xen hypervisor to add memory.
> 
> Signed-off-by: Liu Jinsong <jinsong....@intel.com>
> ---
>  drivers/xen/Kconfig               |   11 +
>  drivers/xen/Makefile              |    1 +
>  drivers/xen/xen-acpi-memhotplug.c |  487 
> +++++++++++++++++++++++++++++++++++++
>  include/xen/interface/platform.h  |   13 +-
>  4 files changed, 508 insertions(+), 4 deletions(-)
>  create mode 100644 drivers/xen/xen-acpi-memhotplug.c
> 
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index 2986de9..b8cf899 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -191,6 +191,17 @@ config XEN_STUB
>  
>         To enable Xen features like cpu and memory hotplug, select Y here.
>  
> +config XEN_ACPI_HOTPLUG_MEMORY
> +     tristate "Xen ACPI memory hotplug"
> +     depends on XEN_STUB && ACPI
> +     default n
> +     help
> +       This is Xen ACPI memory hotplug.
> +
> +       Currently Xen only support ACPI memory hot-add. If you want
> +       to hot-add memory at runtime (the hot-added memory cannot be
> +       removed until machine stop), select Y/M here, otherwise select N.
> +
>  config XEN_ACPI_PROCESSOR
>       tristate "Xen ACPI processor"
>       depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index b63edd8..1605f59 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -31,6 +31,7 @@ obj-$(CONFIG_XEN_MCE_LOG)           += mcelog.o
>  obj-$(CONFIG_XEN_PCIDEV_BACKEND)     += xen-pciback/
>  obj-$(CONFIG_XEN_PRIVCMD)            += xen-privcmd.o
>  obj-$(CONFIG_XEN_STUB)                       += xen-stub.o
> +obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY)        += xen-acpi-memhotplug.o
>  obj-$(CONFIG_XEN_ACPI_PROCESSOR)     += xen-acpi-processor.o
>  xen-evtchn-y                         := evtchn.o
>  xen-gntdev-y                         := gntdev.o
> diff --git a/drivers/xen/xen-acpi-memhotplug.c 
> b/drivers/xen/xen-acpi-memhotplug.c
> new file mode 100644
> index 0000000..d207fec
> --- /dev/null
> +++ b/drivers/xen/xen-acpi-memhotplug.c
> @@ -0,0 +1,487 @@
> +/*
> + * Copyright (C) 2012 Intel Corporation
> + *    Author: Liu Jinsong <jinsong....@intel.com>
> + *    Author: Jiang Yunhong <yunhong.ji...@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or (at
> + * your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
> + * NON INFRINGEMENT.  See the GNU General Public License for more
> + * details.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/types.h>
> +#include <linux/acpi.h>
> +#include <acpi/acpi_drivers.h>
> +#include <xen/acpi.h>
> +#include <xen/interface/platform.h>
> +#include <asm/xen/hypercall.h>
> +
> +#define PREFIX "ACPI:xen_memory_hotplug:"
> +
> +struct acpi_memory_info {
> +     struct list_head list;
> +     u64 start_addr;         /* Memory Range start physical addr */
> +     u64 length;             /* Memory Range length */
> +     unsigned short caching; /* memory cache attribute */
> +     unsigned short write_protect;   /* memory read/write attribute */
> +                             /* copied from buffer getting from _CRS */
> +     unsigned int enabled:1;
> +};
> +
> +struct acpi_memory_device {
> +     struct acpi_device *device;
> +     struct list_head res_list;
> +};
> +
> +static bool acpi_hotmem_initialized __read_mostly;
> +
> +static int xen_hotadd_memory(int pxm, struct acpi_memory_info *info)
> +{
> +     struct xen_platform_op op;
> +
> +     op.cmd = XENPF_mem_hotadd;
> +     op.u.mem_add.spfn = info->start_addr >> PAGE_SHIFT;
> +     op.u.mem_add.epfn = (info->start_addr + info->length) >> PAGE_SHIFT;
> +     op.u.mem_add.pxm = pxm;
> +
> +     return HYPERVISOR_dom0_op(&op);

Don't want to print out the erorrs if it failed?

Say do:
        int rc;

        ..
        rc = HYPERVSIOR_dom0_op(&op);

        if (rc)
                pr_error(PFX "Hotplug Memory Add failed on %lx->%lx, _PXM: %d, 
error: %d\n",
                        ...

?
> +}
> +
> +static int xen_acpi_get_pxm(acpi_handle h)
> +{
> +     unsigned long long pxm;
> +     acpi_status status;
> +     acpi_handle handle;
> +     acpi_handle phandle = h;
> +
> +     do {
> +             handle = phandle;
> +             status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
> +             if (ACPI_SUCCESS(status))
> +                     return pxm;
> +             status = acpi_get_parent(handle, &phandle);
> +     } while (ACPI_SUCCESS(status));
> +     return -1;

Ugh. Why not a normal -Exxx type erorr? Say -ENXIO ?

> +}
> +
> +static int xen_acpi_memory_enable_device(struct acpi_memory_device 
> *mem_device)
> +{
> +     int pxm, result;
> +     int num_enabled = 0;
> +     struct acpi_memory_info *info;
> +
> +     if (!mem_device)
> +             return -EINVAL;
> +
> +     pxm = xen_acpi_get_pxm(mem_device->device->handle);
> +     if (pxm < 0)
> +             return -EINVAL;
> +
> +     list_for_each_entry(info, &mem_device->res_list, list) {
> +             if (info->enabled) { /* just sanity check...*/
> +                     num_enabled++;
> +                     continue;
> +             }
> +
> +             if (!info->length)
> +                     continue;
> +
> +             result = xen_hotadd_memory(pxm, info);
> +             if (result)
> +                     continue;
> +             info->enabled = 1;
> +             num_enabled++;
> +     }
> +
> +     if (!num_enabled)
> +             return -EINVAL;

Is that the correct error to be returned? I thought
-ENODEV would be more appropiate?


> +
> +     return 0;
> +}
> +
> +static acpi_status
> +acpi_memory_get_resource(struct acpi_resource *resource, void *context)
> +{
> +     struct acpi_memory_device *mem_device = context;
> +     struct acpi_resource_address64 address64;
> +     struct acpi_memory_info *info, *new;
> +     acpi_status status;
> +
> +     status = acpi_resource_to_address64(resource, &address64);
> +     if (ACPI_FAILURE(status) ||
> +         (address64.resource_type != ACPI_MEMORY_RANGE))
> +             return AE_OK;
> +
> +     list_for_each_entry(info, &mem_device->res_list, list) {
> +             if ((info->caching == address64.info.mem.caching) &&
> +                 (info->write_protect == address64.info.mem.write_protect) &&
> +                 (info->start_addr + info->length == address64.minimum)) {
> +                     info->length += address64.address_length;
> +                     return AE_OK;
> +             }
> +     }
> +
> +     new = kzalloc(sizeof(struct acpi_memory_info), GFP_KERNEL);
> +     if (!new)
> +             return AE_ERROR;
> +
> +     INIT_LIST_HEAD(&new->list);
> +     new->caching = address64.info.mem.caching;
> +     new->write_protect = address64.info.mem.write_protect;
> +     new->start_addr = address64.minimum;
> +     new->length = address64.address_length;
> +     list_add_tail(&new->list, &mem_device->res_list);
> +
> +     return AE_OK;
> +}
> +
> +static int
> +acpi_memory_get_device_resources(struct acpi_memory_device *mem_device)
> +{
> +     acpi_status status;
> +     struct acpi_memory_info *info, *n;
> +
> +     if (!list_empty(&mem_device->res_list))
> +             return 0;
> +
> +     status = acpi_walk_resources(mem_device->device->handle,
> +             METHOD_NAME__CRS, acpi_memory_get_resource, mem_device);
> +
> +     if (ACPI_FAILURE(status)) {
> +             list_for_each_entry_safe(info, n, &mem_device->res_list, list)
> +                     kfree(info);
> +             INIT_LIST_HEAD(&mem_device->res_list);
> +             return -EINVAL;
> +     }
> +
> +     return 0;
> +}
> +
> +static int
> +acpi_memory_get_device(acpi_handle handle,
> +                    struct acpi_memory_device **mem_device)
> +{
> +     acpi_status status;
> +     acpi_handle phandle;
> +     struct acpi_device *device = NULL;
> +     struct acpi_device *pdevice = NULL;
> +     int result;
> +
> +     if (!acpi_bus_get_device(handle, &device) && device)
> +             goto end;
> +
> +     status = acpi_get_parent(handle, &phandle);
> +     if (ACPI_FAILURE(status)) {
> +             pr_warn(PREFIX "Cannot find acpi parent\n");
> +             return -EINVAL;
> +     }
> +
> +     /* Get the parent device */
> +     result = acpi_bus_get_device(phandle, &pdevice);
> +     if (result) {
> +             pr_warn(PREFIX "Cannot get acpi bus device\n");
> +             return -EINVAL;
> +     }
> +
> +     /*
> +      * Now add the notified device.  This creates the acpi_device
> +      * and invokes .add function
> +      */
> +     result = acpi_bus_add(&device, pdevice, handle, ACPI_BUS_TYPE_DEVICE);
> +     if (result) {
> +             pr_warn(PREFIX "Cannot add acpi bus\n");
> +             return -EINVAL;
> +     }
> +
> +end:
> +     *mem_device = acpi_driver_data(device);
> +     if (!(*mem_device)) {
> +             pr_err(PREFIX "Driver data not found\n");
> +             return -ENODEV;
> +     }
> +
> +     return 0;
> +}
> +
> +static int acpi_memory_check_device(struct acpi_memory_device *mem_device)
> +{
> +     unsigned long long current_status;
> +
> +     /* Get device present/absent information from the _STA */
> +     if (ACPI_FAILURE(acpi_evaluate_integer(mem_device->device->handle,
> +                             "_STA", NULL, &current_status)))
> +             return -ENODEV;
> +     /*
> +      * Check for device status. Device should be
> +      * present/enabled/functioning.
> +      */
> +     if (!((current_status & ACPI_STA_DEVICE_PRESENT)
> +           && (current_status & ACPI_STA_DEVICE_ENABLED)
> +           && (current_status & ACPI_STA_DEVICE_FUNCTIONING)))
> +             return -ENODEV;
> +
> +     return 0;
> +}
> +
> +static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
> +{
> +     pr_warn(PREFIX "Xen does not support memory hotremove\n");

So is this going to show in the dmesg if the user supplies the '0'  in the
SysFS?

Hmm, perhaps that should be pr_debug as the -ENOSYS is enough to tell
the user that we don't support it.

> +
> +     return -ENOSYS;
> +}
> +
> +static void acpi_memory_device_notify(acpi_handle handle, u32 event, void 
> *data)
> +{
> +     struct acpi_memory_device *mem_device;
> +     struct acpi_device *device;
> +     u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; /* default */
> +
> +     switch (event) {
> +     case ACPI_NOTIFY_BUS_CHECK:
> +             ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> +                     "\nReceived BUS CHECK notification for device\n"));
> +             /* Fall Through */
> +     case ACPI_NOTIFY_DEVICE_CHECK:
> +             if (event == ACPI_NOTIFY_DEVICE_CHECK)
> +                     ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> +                     "\nReceived DEVICE CHECK notification for device\n"));
> +
> +             if (acpi_memory_get_device(handle, &mem_device)) {
> +                     pr_err(PREFIX "Cannot find driver data\n");
> +                     break;
> +             }
> +
> +             ost_code = ACPI_OST_SC_SUCCESS;
> +             break;
> +
> +     case ACPI_NOTIFY_EJECT_REQUEST:
> +             ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> +                     "\nReceived EJECT REQUEST notification for device\n"));
> +
> +             if (acpi_bus_get_device(handle, &device)) {
> +                     pr_err(PREFIX "Device doesn't exist\n");
> +                     break;
> +             }
> +             mem_device = acpi_driver_data(device);
> +             if (!mem_device) {
> +                     pr_err(PREFIX "Driver Data is NULL\n");
> +                     break;
> +             }
> +
> +             /*
> +              * TBD: implement acpi_memory_disable_device and invoke
> +              * acpi_bus_remove if Xen support hotremove in the future
> +              */
> +             acpi_memory_disable_device(mem_device);
> +             break;
> +
> +     default:
> +             ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> +                               "Unsupported event [0x%x]\n", event));
> +             /* non-hotplug event; possibly handled by other handler */
> +             return;
> +     }
> +
> +     (void) acpi_evaluate_hotplug_ost(handle, event, ost_code, NULL);
> +     return;
> +}
> +
> +static int xen_acpi_memory_device_add(struct acpi_device *device)
> +{
> +     int result;
> +     struct acpi_memory_device *mem_device = NULL;
> +
> +
> +     if (!device)
> +             return -EINVAL;
> +
> +     mem_device = kzalloc(sizeof(struct acpi_memory_device), GFP_KERNEL);
> +     if (!mem_device)
> +             return -ENOMEM;
> +
> +     INIT_LIST_HEAD(&mem_device->res_list);
> +     mem_device->device = device;
> +     sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME);
> +     sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS);
> +     device->driver_data = mem_device;
> +
> +     /* Get the range from the _CRS */
> +     result = acpi_memory_get_device_resources(mem_device);
> +     if (result) {
> +             kfree(mem_device);
> +             return result;
> +     }
> +
> +     /*
> +      * Early boot code has recognized memory area by EFI/E820.
> +      * If DSDT shows these memory devices on boot, hotplug is not necessary
> +      * for them. So, it just returns until completion of this driver's
> +      * start up.

"So it just returns until completion of this drivers's start up."

Can you change that to be:
"Return OK until this driver starts up."

But then.. how can this function be called with acpi_hotmem_initialized=false?
Is it b/c of the acpi_walk_namespace call? How about you state that:

"This can be done via the acpi_walk_namespace which is called during
early boot and acpi_hotmem_initialized is set _after_ that call
has completed."




> +      */
> +     if (!acpi_hotmem_initialized)
> +             return 0;
> +
> +     if (!acpi_memory_check_device(mem_device))
> +             result = xen_acpi_memory_enable_device(mem_device);
> +
> +     return result;
> +}
> +
> +static int xen_acpi_memory_device_remove(struct acpi_device *device, int 
> type)
> +{
> +     struct acpi_memory_device *mem_device = NULL;
> +
> +     if (!device || !acpi_driver_data(device))
> +             return -EINVAL;
> +
> +     mem_device = acpi_driver_data(device);
> +     kfree(mem_device);
> +
> +     return 0;
> +}
> +
> +/*
> + * Helper function to check for memory device
> + */
> +static acpi_status is_memory_device(acpi_handle handle)
> +{
> +     char *hardware_id;
> +     acpi_status status;
> +     struct acpi_device_info *info;
> +
> +     status = acpi_get_object_info(handle, &info);
> +     if (ACPI_FAILURE(status))
> +             return status;
> +
> +     if (!(info->valid & ACPI_VALID_HID)) {
> +             kfree(info);
> +             return AE_ERROR;
> +     }
> +
> +     hardware_id = info->hardware_id.string;
> +     if ((hardware_id == NULL) ||
> +         (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID)))
> +             status = AE_ERROR;
> +
> +     kfree(info);
> +     return status;
> +}
> +
> +static acpi_status
> +acpi_memory_register_notify_handler(acpi_handle handle,
> +                                 u32 level, void *ctxt, void **retv)
> +{
> +     acpi_status status;
> +
> +     status = is_memory_device(handle);
> +     if (ACPI_FAILURE(status))
> +             return AE_OK;   /* continue */
> +
> +     status = acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
> +                                          acpi_memory_device_notify, NULL);
> +     /* continue */
> +     return AE_OK;
> +}
> +
> +static acpi_status
> +acpi_memory_deregister_notify_handler(acpi_handle handle,
> +                                   u32 level, void *ctxt, void **retv)
> +{
> +     acpi_status status;
> +
> +     status = is_memory_device(handle);
> +     if (ACPI_FAILURE(status))
> +             return AE_OK;   /* continue */
> +
> +     status = acpi_remove_notify_handler(handle,
> +                                         ACPI_SYSTEM_NOTIFY,
> +                                         acpi_memory_device_notify);
> +
> +     return AE_OK;   /* continue */
> +}
> +
> +static const struct acpi_device_id memory_device_ids[] = {
> +     {ACPI_MEMORY_DEVICE_HID, 0},
> +     {"", 0},
> +};
> +MODULE_DEVICE_TABLE(acpi, memory_device_ids);
> +
> +static struct acpi_driver xen_acpi_memory_device_driver = {
> +     .name = "acpi_memhotplug",
> +     .class = ACPI_MEMORY_DEVICE_CLASS,
> +     .ids = memory_device_ids,
> +     .ops = {
> +             .add = xen_acpi_memory_device_add,
> +             .remove = xen_acpi_memory_device_remove,
> +             },
> +};
> +
> +static int __init xen_acpi_memory_device_init(void)
> +{
> +     int result;
> +     acpi_status status;
> +
> +     if (!xen_initial_domain())
> +             return -ENODEV;
> +
> +     /* unregister the stub which only used to reserve driver space */
> +     acpi_bus_unregister_driver(&xen_stub_memory_device_driver);
> +
> +     result = acpi_bus_register_driver(&xen_acpi_memory_device_driver);
> +     if (result < 0)
> +             return -ENODEV;

Shouldn't we then try to re-register the stub driver?

> +
> +     status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
> +                                  ACPI_UINT32_MAX,
> +                                  acpi_memory_register_notify_handler,
> +                                  NULL, NULL, NULL);
> +
> +     if (ACPI_FAILURE(status)) {
> +             pr_warn(PREFIX "walk_namespace failed\n");
> +             acpi_bus_unregister_driver(&xen_acpi_memory_device_driver);

Ditto here.

> +             return -ENODEV;
> +     }
> +
> +     acpi_hotmem_initialized = 1;

s/1/true/

> +     return 0;
> +}
> +
> +static void __exit xen_acpi_memory_device_exit(void)
> +{
> +     acpi_status status;
> +
> +     if (!xen_initial_domain())
> +             return;
> +
> +     status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
> +                                  ACPI_UINT32_MAX,
> +                                  acpi_memory_deregister_notify_handler,
> +                                  NULL, NULL, NULL);
> +     if (ACPI_FAILURE(status))
> +             pr_warn(PREFIX "walk_namespace failed\n");
> +
> +     acpi_bus_unregister_driver(&xen_acpi_memory_device_driver);
> +
> +     /*
> +      * stub reserve space again to prevent any chance of native
> +      * driver loading, though not much meaning in real life

not much meaning in real life? What does that mean?

> +      */
> +     acpi_bus_register_driver(&xen_stub_memory_device_driver);
> +     return;
> +}
> +
> +module_init(xen_acpi_memory_device_init);
> +module_exit(xen_acpi_memory_device_exit);
> +ACPI_MODULE_NAME("xen-acpi-memhotplug");
> +MODULE_AUTHOR("Liu Jinsong <jinsong....@intel.com>");
> +MODULE_DESCRIPTION("Xen Hotplug Mem Driver");
> +MODULE_LICENSE("GPL");
> diff --git a/include/xen/interface/platform.h 
> b/include/xen/interface/platform.h
> index 5e36932..2c4fb4b 100644
> --- a/include/xen/interface/platform.h
> +++ b/include/xen/interface/platform.h
> @@ -324,10 +324,14 @@ struct xenpf_cpu_ol {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
>  
> -/*
> - * CMD 58 and 59 are reserved for cpu hotadd and memory hotadd,
> - * which are already occupied at Xen hypervisor side.
> - */
> +#define XENPF_mem_hotadd     59
> +struct xenpf_mem_hotadd {
> +     uint64_t spfn;
> +     uint64_t epfn;
> +     uint32_t pxm;
> +     uint32_t flags;
> +};
> +
>  #define XENPF_core_parking     60
>  struct xenpf_core_parking {
>       /* IN variables */
> @@ -357,6 +361,7 @@ struct xen_platform_op {
>               struct xenpf_set_processor_pminfo set_pminfo;
>               struct xenpf_pcpuinfo          pcpu_info;
>               struct xenpf_cpu_ol            cpu_ol;
> +             struct xenpf_mem_hotadd        mem_add;
>               struct xenpf_core_parking      core_parking;
>               uint8_t                        pad[128];
>       } u;
> -- 
> 1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to