On Mon, 2014-11-17 at 15:54 -0600, Nathan Fontenot wrote:
> Move handling of memory hotplug add on pseries completely into the kernel.
> 
> The current memory hotplug add path involves the drmgr command doing part
> of this work in userspace and requesting the kernel to do additional pieces.
> This patch allows us to handle the act completely in the kernel via rtas
> hotplug events. This allows us to perform the operation faster and provide
> a common memory hotplug add path for PowerVM and PowerKVM systems.
> 
> The patch does introduce a static rtas_hp_event variable that is set to
> true when updating the device tree during memory hotplug initiated from
> a rtas hotplug event. This is needed because we do not need to do the
> work in the of notifier, this work is already performed in handling the
> hotplug request. At a later time we can remove this when we deprecate the
> previous method of memory hotplug.
> 
> Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
> ---
>  arch/powerpc/platforms/pseries/hotplug-memory.c |  244 
> +++++++++++++++++++++++
>  1 file changed, 243 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 69d178b..b57d42b 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -16,6 +16,7 @@
>  #include <linux/memblock.h>
>  #include <linux/memory.h>
>  #include <linux/memory_hotplug.h>
> +#include <linux/slab.h>
>  
>  #include <asm/firmware.h>
>  #include <asm/machdep.h>
> @@ -23,6 +24,8 @@
>  #include <asm/sparsemem.h>
>  #include "pseries.h"
>  
> +static bool rtas_hp_event;
> +
>  unsigned long pseries_memory_block_size(void)
>  {
>       struct device_node *np;
> @@ -66,6 +69,52 @@ unsigned long pseries_memory_block_size(void)
>       return memblock_size;
>  }
>  
> +static void dlpar_free_drconf_property(struct property *prop)
> +{
> +     kfree(prop->name);
> +     kfree(prop->value);
> +     kfree(prop);
> +}
> +
> +static struct property *dlpar_clone_drconf_property(struct device_node *dn)
> +{
> +     struct property *prop, *new_prop;
> +
> +     prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
> +     if (!prop)
> +             return NULL;
> +
> +     new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
> +     if (!new_prop)
> +             return NULL;
> +
> +     new_prop->name = kstrdup(prop->name, GFP_KERNEL);
> +     new_prop->value = kmalloc(prop->length, GFP_KERNEL);
> +     if (!new_prop->name || !new_prop->value) {
> +             dlpar_free_drconf_property(new_prop);
> +             return NULL;
> +     }
> +
> +     memcpy(new_prop->value, prop->value, prop->length);
> +     new_prop->length = prop->length;
> +
> +     return new_prop;
> +}
> +
> +static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
> +{
> +     unsigned long section_nr;
> +     struct mem_section *mem_sect;
> +     struct memory_block *mem_block;
> +     u64 phys_addr = be64_to_cpu(lmb->base_addr);
> +
> +     section_nr = pfn_to_section_nr(PFN_DOWN(phys_addr));
> +     mem_sect = __nr_to_section(section_nr);
> +
> +     mem_block = find_memory_block(mem_sect);
> +     return mem_block;
> +}
> +
>  #ifdef CONFIG_MEMORY_HOTREMOVE
>  static int pseries_remove_memblock(unsigned long base, unsigned int 
> memblock_size)
>  {
> @@ -136,19 +185,209 @@ static inline int pseries_remove_mem_node(struct 
> device_node *np)
>  }
>  #endif /* CONFIG_MEMORY_HOTREMOVE */
>  
> +static int dlpar_add_lmb(struct of_drconf_cell *lmb)
> +{
> +     struct memory_block *mem_block;
> +     u64 phys_addr;
> +     uint32_t drc_index;
I started commenting this and it turns out you've used uint32_t almost
everywhere for values you've pulled from the device tree or the elog.
These should be u32.
> +     unsigned long pages_per_block;
> +     unsigned long block_sz;
> +     int nid, sections_per_block;
> +     int rc;
> +
> +     if (be32_to_cpu(lmb->flags) & DRCONF_MEM_ASSIGNED)
> +             return -EINVAL;
> +
> +     phys_addr = be64_to_cpu(lmb->base_addr);
> +     drc_index = be32_to_cpu(lmb->drc_index);
> +     block_sz = memory_block_size_bytes();
> +     sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
> +     pages_per_block = PAGES_PER_SECTION * sections_per_block;
> +
Perhaps I'm being a bit slow here but it isn't exactly clear what you're
getting with all those variables and could you explain what that
statement below is checking for?

> +     if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
> +             return -EINVAL;
> +
> +     rc = dlpar_acquire_drc(drc_index);
> +     if (rc)
> +             return rc;
> +
> +     /* Find the node id for this address */
> +     nid = memory_add_physaddr_to_nid(phys_addr);
> +
> +     /* Add the memory */
> +     rc = add_memory(nid, phys_addr, block_sz);
> +     if (rc) {
> +             dlpar_release_drc(drc_index);
> +             return rc;
> +     }
> +
> +     /* Register this block of memory */
> +     rc = memblock_add(phys_addr, block_sz);
> +     if (rc) {
> +             remove_memory(nid, phys_addr, block_sz);
> +             dlpar_release_drc(drc_index);
> +             return rc;
> +     }
> +
> +     mem_block = lmb_to_memblock(lmb);
> +     if (!mem_block) {
> +             remove_memory(nid, phys_addr, block_sz);
> +             dlpar_release_drc(drc_index);
> +             return -EINVAL;
> +     }
> +
> +     rc = device_online(&mem_block->dev);
> +     put_device(&mem_block->dev);
> +     if (rc) {
> +             remove_memory(nid, phys_addr, block_sz);
> +             dlpar_release_drc(drc_index);
> +             return rc;
> +     }
> +
> +     lmb->flags |= cpu_to_be32(DRCONF_MEM_ASSIGNED);
> +     return 0;
> +}
> +
> +static int dlpar_memory_add_by_count(struct pseries_hp_errorlog *hp_elog,
> +                                  struct property *prop)
> +{
> +     struct of_drconf_cell *lmbs;
> +     uint32_t num_lmbs;
> +     __be32 *p;
> +     int i, lmbs_to_add;
> +     int lmbs_available = 0;
> +     int lmbs_added = 0;
> +     int rc;
> +
> +     lmbs_to_add = be32_to_cpu(hp_elog->_drc_u.drc_count);
Didn't you already do the endian conversion back in
handle_dlpar_errorlog?
> +     pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
> +
> +     if (lmbs_to_add == 0)
> +             return -EINVAL;
> +
> +     p = prop->value;
> +     num_lmbs = be32_to_cpu(*p++);
> +     lmbs = (struct of_drconf_cell *)p;
> +
> +     /* Validate that there are enough LMBs to satisfy the request */
> +     for (i = 0; i < num_lmbs; i++) {
> +             if (!(be32_to_cpu(lmbs[i].flags) & DRCONF_MEM_ASSIGNED))
> +                     lmbs_available++;
> +     }
> +
> +     if (lmbs_available < lmbs_to_add)
> +             return -EINVAL;
> +
I'm wondering why the next 3 lines when the if could be incorporated
into the for condition
for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) {
(or lmbs_to_add < lmbs_added)...
> +     for (i = 0; i < num_lmbs; i++) {
> +             if (lmbs_to_add == lmbs_added)
> +                     break;
> +
> +             rc = dlpar_add_lmb(&lmbs[i]);
> +             if (rc)
> +                     continue;
> +
> +             lmbs_added++;
> +             pr_info("Memory at %llx (drc index %x) has been hot-added\n",
> +                     be64_to_cpu(lmbs[i].base_addr),
> +                     be32_to_cpu(lmbs[i].drc_index));
This message feels a tad premature, you're going to roll back if the
requested amount couldn't be added which is good but then there's going
to be a confusing mix of 'hot-added' followed by 'hot-removed'. Could
this message be moved to when you clear the reserved fields?
> +
> +             /* Mark this lmb so we can remove it later if all of the
> +              * requested LMBs cannot be added.
> +              */
> +             lmbs[i].reserved = 1;
Writing 1 like that into a __be variable will upset sparse. 

As a more general comment that it might be worth not passing around __be
structures and convert them all into CPU endian in one place so that all
these functions can do away with the endian conversion calls.
> +     }
> +
> +     if (lmbs_added != lmbs_to_add) {
> +             /* TODO: remove added lmbs */
> +             rc = -EINVAL;
> +     }
> +
> +     /* Clear the reserved fields */
> +     for (i = 0; i < num_lmbs; i++)
> +             lmbs[i].reserved = 0;
> +
> +     return rc;
> +}
> +
> +static int dlpar_memory_add_by_index(struct pseries_hp_errorlog *hp_elog,
> +                                  struct property *prop)
> +{
> +     struct of_drconf_cell *lmbs;
> +     uint32_t num_lmbs, drc_index;
> +     __be32 *p;
> +     int i, lmb_found;
> +     int rc;
> +
> +     drc_index = be32_to_cpu(hp_elog->_drc_u.drc_index);
Didn't you already do the endian conversion back in
handle_dlpar_errorlog?
> +     pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index);
> +
> +     p = prop->value;
> +     num_lmbs = be32_to_cpu(*p++);
> +     lmbs = (struct of_drconf_cell *)p;
> +
> +     lmb_found = 0;
> +     for (i = 0; i < num_lmbs; i++) {
> +             if (lmbs[i].drc_index == hp_elog->_drc_u.drc_index) {
Probably wanted to use your local variable drc_index here also
lmbs[i].drc_index is __be and I don't think you've converted it but the
other side of the condition has been...
> +                     lmb_found = 1;
> +                     rc = dlpar_add_lmb(&lmbs[i]);
> +                     break;
> +             }
> +     }
> +
> +     if (!lmb_found)
> +             rc = -EINVAL;
> +
> +     if (rc)
> +             pr_info("Failed to hot-add memory, drc index %x\n", drc_index);
> +     else
> +             pr_info("Memory at %llx (drc index %x) has been hot-added\n",
> +                     be64_to_cpu(lmbs[i].base_addr), drc_index);
> +
> +     return rc;
> +}
> +
>  int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
>  {
> -     int rc = 0;
> +     struct device_node *dn;
> +     struct property *prop;
> +     int rc;
>  
>       lock_device_hotplug();
>  
> +     dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
> +     if (!dn)
> +             return -EINVAL;
> +
> +     prop = dlpar_clone_drconf_property(dn);
> +     if (!prop) {
> +             of_node_put(dn);
> +             return -EINVAL;
> +     }
> +
>       switch (hp_elog->action) {
> +     case PSERIES_HP_ELOG_ACTION_ADD:
> +             if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
> +                     rc = dlpar_memory_add_by_count(hp_elog, prop);
> +             else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
> +                     rc = dlpar_memory_add_by_index(hp_elog, prop);
> +             else
> +                     rc = -EINVAL;
> +             break;
>       default:
>               pr_err("Invalid action (%d) specified\n", hp_elog->action);
>               rc = -EINVAL;
>               break;
>       }
>  
> +     if (rc)
> +             dlpar_free_drconf_property(prop);
> +     else {
> +             rtas_hp_event = true;
> +             of_update_property(dn, prop);
> +             rtas_hp_event = false;
> +     }
> +
> +     of_node_put(dn);
>       unlock_device_hotplug();
>       return rc;
>  }
> @@ -193,6 +432,9 @@ static int pseries_update_drconf_memory(struct 
> of_prop_reconfig *pr)
>       __be32 *p;
>       int i, rc = -EINVAL;
>  
> +     if (rtas_hp_event)
> +             return 0;
> +
>       memblock_size = pseries_memory_block_size();
>       if (!memblock_size)
>               return -EINVAL;
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev



_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to