On Mon, 2014-11-24 at 09:03 -0600, Nathan Fontenot wrote:
> 
> On 11/21/2014 01:49 AM, Cyril Bur wrote:
> > 
> > On Mon, 2014-11-17 at 15:56 -0600, Nathan Fontenot wrote:
> >> Move handling of memory hotplug remove on pseries completely into the 
> >> kernel.
> >>
> >> The current memory hotplug remove path involves the drmgr command doing 
> >> part
> >> of this work in userspace and requesting the kernel to do additional 
> >> pieces.
> >> This patch allows us to handle the act completely in the kernel via rtas
> >> hotplug events. This allows us to perform the operation faster and provide
> >> a common memory hotplug remove path for PowerVM and PowerKVM systems.
> >>
> >> Signed-off-by: Nathan Fontenot <nf...@linux.vnet.ibm.com>
> >> ---
> >>  arch/powerpc/platforms/pseries/hotplug-memory.c |  206 
> >> ++++++++++++++++++++++-
> >>  1 file changed, 201 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> >> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> >> index b57d42b..c8189e8 100644
> >> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> >> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> >> @@ -173,6 +173,179 @@ static int pseries_remove_mem_node(struct 
> >> device_node *np)
> >>    pseries_remove_memblock(base, lmb_size);
> >>    return 0;
> >>  }
> >> +
> >> +static int lmb_is_removable(struct of_drconf_cell *lmb)
> >> +{
> >> +  int i, scns_per_block;
> >> +  int rc = 1;
> >> +  unsigned long pfn, block_sz;
> >> +  u64 phys_addr;
> >> +
> >> +  if (!(be32_to_cpu(lmb->flags) & DRCONF_MEM_ASSIGNED))
> >> +          return -1;
> > This makes me kind of nervous. You're using the return value of
> > lmb_is_removable as a boolean but it returns three possible values -1,0
> > and 1. Functionally it looks correct to me so its not a massive issue
> 
> Oh yuck. this should be a boolean return. I'll fix that.
> 
> >> +
> >> +  phys_addr = be64_to_cpu(lmb->base_addr);
> >> +  block_sz = memory_block_size_bytes();
> >> +  scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
> >> +
> >> +  for (i = 0; i < scns_per_block; i++) {
> >> +          pfn = PFN_DOWN(phys_addr);
> >> +          if (!pfn_present(pfn))
> >> +                  continue;
> >> +
> >> +          rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
> >> +          phys_addr += MIN_MEMORY_BLOCK_SIZE;
> >> +  }
> >> +
> >> +  return rc;
> >> +}
> >> +
> >> +static int dlpar_add_lmb(struct of_drconf_cell *);
> >> +
> >> +static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
> >> +{
> >> +  struct memory_block *mem_block;
> >> +  unsigned long block_sz;
> >> +  u64 phys_addr;
> >> +  uint32_t drc_index;
> >> +  int nid, rc;
> >> +
> >> +  if (!lmb_is_removable(lmb))
> >> +          return -EINVAL;
> >> +
> >> +  phys_addr = be64_to_cpu(lmb->base_addr);
> >> +  drc_index = be32_to_cpu(lmb->drc_index);
> >> +
> >> +  mem_block = lmb_to_memblock(lmb);
> >> +  if (!mem_block)
> >> +          return -EINVAL;
> >> +
> >> +  rc = device_offline(&mem_block->dev);
> >> +  put_device(&mem_block->dev);
> >> +  if (rc)
> >> +          return rc;
> >> +
> >> +  block_sz = pseries_memory_block_size();
> >> +  nid = memory_add_physaddr_to_nid(phys_addr);
> >> +
> >> +  remove_memory(nid, phys_addr, block_sz);
> >> +
> >> +  /* Update memory regions for memory remove */
> >> +  memblock_remove(phys_addr, block_sz);
> >> +
> >> +  dlpar_release_drc(drc_index);
> >> +
> >> +  lmb->flags &= cpu_to_be32(~DRCONF_MEM_ASSIGNED);
> >> +  pr_info("Memory at %llx (drc index %x) has been hot-removed\n",
> >> +          be64_to_cpu(lmb->base_addr), drc_index);
> > dlpar_add_lmb doesn't print anything but dlpar_remove_lmb does? Related
> > to my comment about printing a 'hot-add' messages prematurely, perhaps
> > move this to the callers
> 
> Yes, I'll update the remove mem messages also.
> 
> >> +
> >> +  return 0;
> >> +}
> >> +
> >> +static int dlpar_memory_remove_by_count(struct pseries_hp_errorlog 
> >> *hp_elog,
> >> +                                  struct property *prop)
> >> +{
> >> +  struct of_drconf_cell *lmbs;
> >> +  int lmbs_to_remove, lmbs_removed = 0;
> >> +  int lmbs_available = 0;
> >> +  uint32_t num_lmbs;
> >> +  __be32 *p;
> >> +  int i, rc;
> >> +
> >> +  lmbs_to_remove = be32_to_cpu(hp_elog->_drc_u.drc_count);
> > Didn't you already do the endian conversion back in
> > handle_dlpar_errorlog?
> >> +  pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove);
> >> +
> >> +  if (lmbs_to_remove == 0)
> >> +          return -EINVAL;
> >> +
> >> +  p = prop->value;
> >> +  num_lmbs = be32_to_cpu(*p++);
> >> +  lmbs = (struct of_drconf_cell *)p;
> >> +
> >> +  /* Validate that there are enough LMBs to satisfy the request */
> >> +  for (i = 0; i < num_lmbs; i++) {
> >> +          if (be32_to_cpu(lmbs[i].flags) & DRCONF_MEM_ASSIGNED)
> >> +                  lmbs_available++;
> >> +  }
> >> +
> >> +  if (lmbs_available < lmbs_to_remove)
> >> +          return -EINVAL;
> >> +
> >> +  for (i = 0; i < num_lmbs; i++) {
> >> +          if (lmbs_to_remove == lmbs_removed)
> >> +                  break;
> >> +
> >> +          rc = dlpar_remove_lmb(&lmbs[i]);
> >> +          if (rc)
> >> +                  continue;
> >> +
> >> +          lmbs_removed++;
> >> +
> >> +          /* Mark this lmb so we can add it later if all of the
> >> +           * requested LMBs cannot be removed.
> >> +           */
> >> +          lmbs[i].reserved = 1;
> >> +  }
> >> +
> >> +  if (lmbs_removed != lmbs_to_remove) {
> >> +          pr_err("Memory hot-remove failed, adding LMB's back\n");
> >> +
> >> +          for (i = 0; i < num_lmbs; i++) {
> >> +                  if (!lmbs[i].reserved)
> >> +                          continue;
> >> +
> >> +                  rc = dlpar_add_lmb(&lmbs[i]);
> >> +                  if (rc)
> > If this happens you this will leave an LMB removed but an error code
> > from this function will cause dlpar_memory to discard the dn prop that
> > it passed in. If you couldn't roll back completely the caller should
> > still update the dn property with the ones that it has left
> > removed/failed to re-add. Perhaps this function needs a mechanism to
> > report success/failure (as it currently does) and a mechanism to say
> > whether or not the dn property is dirty or not.
> 
> Good catch. The device tree should be updated to denote lmb's that were
> added but we failed to remove because of some error.
> 
> >> +                          pr_err("Failed to add LMB back, drc index %x\n",
> >> +                                 be32_to_cpu(lmbs[i].drc_index));
> >> +
> > On a clean (everything rolled back successfully) failure, I don't think
> > you _need_ to bother although this is nice cleanup, you didn't do it
> > dlpar_memory_add_by_count - I'm in favour of being clean especially
> > since on an 'unclean' failure you should clear that flag - if you're
> > going to still update the device tree.
> 
> I'll re-visit the cleanup code to make sure the device tree property is left
> in the correct state on a failure during roll-back.
> 
> >> +                  lmbs[i].reserved = 0;
> >> +          }
> >> +          rc = -EINVAL;
> >> +  } else {
> >> +          /* remove any reserved markings */
> >> +          for (i = 0; i < num_lmbs; i++)
> >> +                  lmbs[i].reserved = 0;
> >> +  }
> > Hmmmm since the if statements are checking for failures. It might be
> > more clear to return -EINVAL (rather than setting rc) and the code in
> > the else block can be moved out of it and come to think of it, return 0.
> > Otherwise it looks odd to me with the 'normal' success case code being
> > in an else statement.
> 
> I like this.
> 
> >> +
> >> +  return rc;
> >> +}
> >> +
> >> +static int dlpar_memory_remove_by_index(struct pseries_hp_errorlog 
> >> *hp_elog,
> >> +                                  struct property *prop)
> >> +{
> >> +  struct of_drconf_cell *lmbs;
> >> +  uint32_t num_lmbs, drc_index;
> >> +  int lmb_found;
> >> +  __be32 *p;
> >> +  int i, rc;
> >> +
> >> +  drc_index = be32_to_cpu(hp_elog->_drc_u.drc_index);
> > Didn't you already do the endian conversion back in
> > handle_dlpar_errorlog?
> >> +  pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index);
> >> +
> >> +  p = prop->value;
> >> +  num_lmbs = be32_to_cpu(*p++);
> >> +  lmbs = (struct of_drconf_cell *)p;
> >> +
> >> +  lmb_found = 0;
> >> +  for (i = 0; i < num_lmbs; i++) {
> >> +          if (lmbs[i].drc_index == hp_elog->_drc_u.drc_index) {
> >> +                  lmb_found = 1;
> >> +                  rc = dlpar_remove_lmb(&lmbs[i]);
> >> +                  break;
> >> +          }
> >> +  }
> >> +
> >> +  if (!lmb_found)
> >> +          rc = -EINVAL;
> >> +
> >> +  if (rc)
> >> +          pr_info("Failed to hot-remove memory, drc index %x\n",
> >> +                  drc_index);
> >> +
> >> +  return rc;
> >> +}
> >> +
> >>  #else
> >>  static inline int pseries_remove_memblock(unsigned long base,
> >>                                      unsigned int memblock_size)
> >> @@ -183,6 +356,11 @@ static inline int pseries_remove_mem_node(struct 
> >> device_node *np)
> >>  {
> >>    return 0;
> >>  }
> >> +static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog)
> >> +{
> >> +  return -EOPNOTSUPP;
> >> +}
> >> +
> >>  #endif /* CONFIG_MEMORY_HOTREMOVE */
> >>  
> >>  static int dlpar_add_lmb(struct of_drconf_cell *lmb)
> >> @@ -298,14 +476,24 @@ static int dlpar_memory_add_by_count(struct 
> >> pseries_hp_errorlog *hp_elog,
> >>    }
> >>  
> >>    if (lmbs_added != lmbs_to_add) {
> >> -          /* TODO: remove added lmbs */
> >> +          pr_err("Memory hot-add failed, removing any added LMBs\n");
> >> +
> >> +          for (i = 0; i < num_lmbs; i++) {
> >> +                  if (!lmbs[i].reserved)
> >> +                          continue;
> >> +
> >> +                  rc = dlpar_remove_lmb(&lmbs[i]);
> >> +                  if (rc)
> > There is a very much related comment in dlpar_memory_remove_by_count
> > about this condition being true.
> >> +                          pr_err("Failed to remove LMB, drc index %x\n",
> >> +                                 be32_to_cpu(lmbs[i].drc_index));
> > 
> > You don't clear the reserved flag here?
> In the failure case here the device tree property is not updated, so no
> cleanup needed. This is not really obvious so it would probably be best
> to do the cleanup.
>  
Of course except when you fail to roll back and you'll be forced to
update the device tree - but I suppose clearing it here or not really
depends on how you do that...
> > 
> >> +          }
> >>            rc = -EINVAL;
> > Same observation as in dlpar_memory_remove_by_count
> 
> ok.
> 
> Thanks for the review.
> 
> -Nathan
> 
> >> +  } else {
> >> +          /* Clear the reserved fields */
> >> +          for (i = 0; i < num_lmbs; i++)
> >> +                  lmbs[i].reserved = 0;
> >>    }
> >>  
> >> -  /* Clear the reserved fields */
> >> -  for (i = 0; i < num_lmbs; i++)
> >> -          lmbs[i].reserved = 0;
> >> -
> >>    return rc;
> >>  }
> >>  
> >> @@ -373,6 +561,14 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
> >>            else
> >>                    rc = -EINVAL;
> >>            break;
> >> +  case PSERIES_HP_ELOG_ACTION_REMOVE:
> >> +          if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
> >> +                  rc = dlpar_memory_remove_by_count(hp_elog, prop);
> >> +          else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
> >> +                  rc = dlpar_memory_remove_by_index(hp_elog, prop);
> >> +          else
> >> +                  rc = -EINVAL;
> >> +          break;
> >>    default:
> >>            pr_err("Invalid action (%d) specified\n", hp_elog->action);
> >>            rc = -EINVAL;
> >>
> >> _______________________________________________
> >> Linuxppc-dev mailing list
> >> Linuxppc-dev@lists.ozlabs.org
> >> https://lists.ozlabs.org/listinfo/linuxppc-dev
> > 
> > 
> > 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev


_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to