On Mon, 2014-11-24 at 09:03 -0600, Nathan Fontenot wrote: > > On 11/21/2014 01:49 AM, Cyril Bur wrote: > > > > On Mon, 2014-11-17 at 15:56 -0600, Nathan Fontenot wrote: > >> Move handling of memory hotplug remove on pseries completely into the > >> kernel. > >> > >> The current memory hotplug remove path involves the drmgr command doing > >> part > >> of this work in userspace and requesting the kernel to do additional > >> pieces. > >> This patch allows us to handle the act completely in the kernel via rtas > >> hotplug events. This allows us to perform the operation faster and provide > >> a common memory hotplug remove path for PowerVM and PowerKVM systems. > >> > >> Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com> > >> --- > >> arch/powerpc/platforms/pseries/hotplug-memory.c | 206 > >> ++++++++++++++++++++++- > >> 1 file changed, 201 insertions(+), 5 deletions(-) > >> > >> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c > >> b/arch/powerpc/platforms/pseries/hotplug-memory.c > >> index b57d42b..c8189e8 100644 > >> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c > >> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c > >> @@ -173,6 +173,179 @@ static int pseries_remove_mem_node(struct > >> device_node *np) > >> pseries_remove_memblock(base, lmb_size); > >> return 0; > >> } > >> + > >> +static int lmb_is_removable(struct of_drconf_cell *lmb) > >> +{ > >> + int i, scns_per_block; > >> + int rc = 1; > >> + unsigned long pfn, block_sz; > >> + u64 phys_addr; > >> + > >> + if (!(be32_to_cpu(lmb->flags) & DRCONF_MEM_ASSIGNED)) > >> + return -1; > > This makes me kind of nervous. You're using the return value of > > lmb_is_removable as a boolean but it returns three possible values -1,0 > > and 1. Functionally it looks correct to me so its not a massive issue > > Oh yuck. this should be a boolean return. I'll fix that. > > >> + > >> + phys_addr = be64_to_cpu(lmb->base_addr); > >> + block_sz = memory_block_size_bytes(); > >> + scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; > >> + > >> + for (i = 0; i < scns_per_block; i++) { > >> + pfn = PFN_DOWN(phys_addr); > >> + if (!pfn_present(pfn)) > >> + continue; > >> + > >> + rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION); > >> + phys_addr += MIN_MEMORY_BLOCK_SIZE; > >> + } > >> + > >> + return rc; > >> +} > >> + > >> +static int dlpar_add_lmb(struct of_drconf_cell *); > >> + > >> +static int dlpar_remove_lmb(struct of_drconf_cell *lmb) > >> +{ > >> + struct memory_block *mem_block; > >> + unsigned long block_sz; > >> + u64 phys_addr; > >> + uint32_t drc_index; > >> + int nid, rc; > >> + > >> + if (!lmb_is_removable(lmb)) > >> + return -EINVAL; > >> + > >> + phys_addr = be64_to_cpu(lmb->base_addr); > >> + drc_index = be32_to_cpu(lmb->drc_index); > >> + > >> + mem_block = lmb_to_memblock(lmb); > >> + if (!mem_block) > >> + return -EINVAL; > >> + > >> + rc = device_offline(&mem_block->dev); > >> + put_device(&mem_block->dev); > >> + if (rc) > >> + return rc; > >> + > >> + block_sz = pseries_memory_block_size(); > >> + nid = memory_add_physaddr_to_nid(phys_addr); > >> + > >> + remove_memory(nid, phys_addr, block_sz); > >> + > >> + /* Update memory regions for memory remove */ > >> + memblock_remove(phys_addr, block_sz); > >> + > >> + dlpar_release_drc(drc_index); > >> + > >> + lmb->flags &= cpu_to_be32(~DRCONF_MEM_ASSIGNED); > >> + pr_info("Memory at %llx (drc index %x) has been hot-removed\n", > >> + be64_to_cpu(lmb->base_addr), drc_index); > > dlpar_add_lmb doesn't print anything but dlpar_remove_lmb does? Related > > to my comment about printing a 'hot-add' messages prematurely, perhaps > > move this to the callers > > Yes, I'll update the remove mem messages also. > > >> + > >> + return 0; > >> +} > >> + > >> +static int dlpar_memory_remove_by_count(struct pseries_hp_errorlog > >> *hp_elog, > >> + struct property *prop) > >> +{ > >> + struct of_drconf_cell *lmbs; > >> + int lmbs_to_remove, lmbs_removed = 0; > >> + int lmbs_available = 0; > >> + uint32_t num_lmbs; > >> + __be32 *p; > >> + int i, rc; > >> + > >> + lmbs_to_remove = be32_to_cpu(hp_elog->_drc_u.drc_count); > > Didn't you already do the endian conversion back in > > handle_dlpar_errorlog? > >> + pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove); > >> + > >> + if (lmbs_to_remove == 0) > >> + return -EINVAL; > >> + > >> + p = prop->value; > >> + num_lmbs = be32_to_cpu(*p++); > >> + lmbs = (struct of_drconf_cell *)p; > >> + > >> + /* Validate that there are enough LMBs to satisfy the request */ > >> + for (i = 0; i < num_lmbs; i++) { > >> + if (be32_to_cpu(lmbs[i].flags) & DRCONF_MEM_ASSIGNED) > >> + lmbs_available++; > >> + } > >> + > >> + if (lmbs_available < lmbs_to_remove) > >> + return -EINVAL; > >> + > >> + for (i = 0; i < num_lmbs; i++) { > >> + if (lmbs_to_remove == lmbs_removed) > >> + break; > >> + > >> + rc = dlpar_remove_lmb(&lmbs[i]); > >> + if (rc) > >> + continue; > >> + > >> + lmbs_removed++; > >> + > >> + /* Mark this lmb so we can add it later if all of the > >> + * requested LMBs cannot be removed. > >> + */ > >> + lmbs[i].reserved = 1; > >> + } > >> + > >> + if (lmbs_removed != lmbs_to_remove) { > >> + pr_err("Memory hot-remove failed, adding LMB's back\n"); > >> + > >> + for (i = 0; i < num_lmbs; i++) { > >> + if (!lmbs[i].reserved) > >> + continue; > >> + > >> + rc = dlpar_add_lmb(&lmbs[i]); > >> + if (rc) > > If this happens you this will leave an LMB removed but an error code > > from this function will cause dlpar_memory to discard the dn prop that > > it passed in. If you couldn't roll back completely the caller should > > still update the dn property with the ones that it has left > > removed/failed to re-add. Perhaps this function needs a mechanism to > > report success/failure (as it currently does) and a mechanism to say > > whether or not the dn property is dirty or not. > > Good catch. The device tree should be updated to denote lmb's that were > added but we failed to remove because of some error. > > >> + pr_err("Failed to add LMB back, drc index %x\n", > >> + be32_to_cpu(lmbs[i].drc_index)); > >> + > > On a clean (everything rolled back successfully) failure, I don't think > > you _need_ to bother although this is nice cleanup, you didn't do it > > dlpar_memory_add_by_count - I'm in favour of being clean especially > > since on an 'unclean' failure you should clear that flag - if you're > > going to still update the device tree. > > I'll re-visit the cleanup code to make sure the device tree property is left > in the correct state on a failure during roll-back. > > >> + lmbs[i].reserved = 0; > >> + } > >> + rc = -EINVAL; > >> + } else { > >> + /* remove any reserved markings */ > >> + for (i = 0; i < num_lmbs; i++) > >> + lmbs[i].reserved = 0; > >> + } > > Hmmmm since the if statements are checking for failures. It might be > > more clear to return -EINVAL (rather than setting rc) and the code in > > the else block can be moved out of it and come to think of it, return 0. > > Otherwise it looks odd to me with the 'normal' success case code being > > in an else statement. > > I like this. > > >> + > >> + return rc; > >> +} > >> + > >> +static int dlpar_memory_remove_by_index(struct pseries_hp_errorlog > >> *hp_elog, > >> + struct property *prop) > >> +{ > >> + struct of_drconf_cell *lmbs; > >> + uint32_t num_lmbs, drc_index; > >> + int lmb_found; > >> + __be32 *p; > >> + int i, rc; > >> + > >> + drc_index = be32_to_cpu(hp_elog->_drc_u.drc_index); > > Didn't you already do the endian conversion back in > > handle_dlpar_errorlog? > >> + pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index); > >> + > >> + p = prop->value; > >> + num_lmbs = be32_to_cpu(*p++); > >> + lmbs = (struct of_drconf_cell *)p; > >> + > >> + lmb_found = 0; > >> + for (i = 0; i < num_lmbs; i++) { > >> + if (lmbs[i].drc_index == hp_elog->_drc_u.drc_index) { > >> + lmb_found = 1; > >> + rc = dlpar_remove_lmb(&lmbs[i]); > >> + break; > >> + } > >> + } > >> + > >> + if (!lmb_found) > >> + rc = -EINVAL; > >> + > >> + if (rc) > >> + pr_info("Failed to hot-remove memory, drc index %x\n", > >> + drc_index); > >> + > >> + return rc; > >> +} > >> + > >> #else > >> static inline int pseries_remove_memblock(unsigned long base, > >> unsigned int memblock_size) > >> @@ -183,6 +356,11 @@ static inline int pseries_remove_mem_node(struct > >> device_node *np) > >> { > >> return 0; > >> } > >> +static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog) > >> +{ > >> + return -EOPNOTSUPP; > >> +} > >> + > >> #endif /* CONFIG_MEMORY_HOTREMOVE */ > >> > >> static int dlpar_add_lmb(struct of_drconf_cell *lmb) > >> @@ -298,14 +476,24 @@ static int dlpar_memory_add_by_count(struct > >> pseries_hp_errorlog *hp_elog, > >> } > >> > >> if (lmbs_added != lmbs_to_add) { > >> - /* TODO: remove added lmbs */ > >> + pr_err("Memory hot-add failed, removing any added LMBs\n"); > >> + > >> + for (i = 0; i < num_lmbs; i++) { > >> + if (!lmbs[i].reserved) > >> + continue; > >> + > >> + rc = dlpar_remove_lmb(&lmbs[i]); > >> + if (rc) > > There is a very much related comment in dlpar_memory_remove_by_count > > about this condition being true. > >> + pr_err("Failed to remove LMB, drc index %x\n", > >> + be32_to_cpu(lmbs[i].drc_index)); > > > > You don't clear the reserved flag here? > In the failure case here the device tree property is not updated, so no > cleanup needed. This is not really obvious so it would probably be best > to do the cleanup. > Of course except when you fail to roll back and you'll be forced to update the device tree - but I suppose clearing it here or not really depends on how you do that... > > > >> + } > >> rc = -EINVAL; > > Same observation as in dlpar_memory_remove_by_count > > ok. > > Thanks for the review. > > -Nathan > > >> + } else { > >> + /* Clear the reserved fields */ > >> + for (i = 0; i < num_lmbs; i++) > >> + lmbs[i].reserved = 0; > >> } > >> > >> - /* Clear the reserved fields */ > >> - for (i = 0; i < num_lmbs; i++) > >> - lmbs[i].reserved = 0; > >> - > >> return rc; > >> } > >> > >> @@ -373,6 +561,14 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) > >> else > >> rc = -EINVAL; > >> break; > >> + case PSERIES_HP_ELOG_ACTION_REMOVE: > >> + if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) > >> + rc = dlpar_memory_remove_by_count(hp_elog, prop); > >> + else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) > >> + rc = dlpar_memory_remove_by_index(hp_elog, prop); > >> + else > >> + rc = -EINVAL; > >> + break; > >> default: > >> pr_err("Invalid action (%d) specified\n", hp_elog->action); > >> rc = -EINVAL; > >> > >> _______________________________________________ > >> Linuxppc-dev mailing list > >> Linuxppc-dev@lists.ozlabs.org > >> https://lists.ozlabs.org/listinfo/linuxppc-dev > > > > > > > > _______________________________________________ > Linuxppc-dev mailing list > Linuxppc-dev@lists.ozlabs.org > https://lists.ozlabs.org/listinfo/linuxppc-dev
_______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev