Re: [PATCH v1 2/4] mm/memory_hotplug: remove nid parameter from arch_remove_memory()

2021-07-14 Thread Pankaj Gupta
ndex 74b78840182d..bd90b8fe81e4 100644
> --- a/arch/x86/mm/init_32.c
> +++ b/arch/x86/mm/init_32.c
> @@ -801,8 +801,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
> return __add_pages(nid, start_pfn, nr_pages, params);
>  }
>
> -void arch_remove_memory(int nid, u64 start, u64 size,
> -   struct vmem_altmap *altmap)
> +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
>  {
> unsigned long start_pfn = start >> PAGE_SHIFT;
> unsigned long nr_pages = size >> PAGE_SHIFT;
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index ddeaba947eb3..a6e11763763f 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -1255,8 +1255,7 @@ kernel_physical_mapping_remove(unsigned long start, 
> unsigned long end)
> remove_pagetable(start, end, true, NULL);
>  }
>
> -void __ref arch_remove_memory(int nid, u64 start, u64 size,
> - struct vmem_altmap *altmap)
> +void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap 
> *altmap)
>  {
> unsigned long start_pfn = start >> PAGE_SHIFT;
> unsigned long nr_pages = size >> PAGE_SHIFT;
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index d01b504ce06f..010a192298b5 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -130,8 +130,7 @@ static inline bool movable_node_is_enabled(void)
> return movable_node_enabled;
>  }
>
> -extern void arch_remove_memory(int nid, u64 start, u64 size,
> -  struct vmem_altmap *altmap);
> +extern void arch_remove_memory(u64 start, u64 size, struct vmem_altmap 
> *altmap);
>  extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
>struct vmem_altmap *altmap);
>
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 93b3abaf9828..f2a9af3af184 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1106,7 +1106,7 @@ int __ref add_memory_resource(int nid, struct resource 
> *res, mhp_t mhp_flags)
> /* create memory block devices after memory was added */
> ret = create_memory_block_devices(start, size, mhp_altmap.alloc);
> if (ret) {
> -   arch_remove_memory(nid, start, size, NULL);
> +   arch_remove_memory(start, size, NULL);
> goto error;
> }
>
> @@ -1892,7 +1892,7 @@ static int __ref try_remove_memory(int nid, u64 start, 
> u64 size)
>
> mem_hotplug_begin();
>
> -   arch_remove_memory(nid, start, size, altmap);
> +   arch_remove_memory(start, size, altmap);
>
> if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
> memblock_free(start, size);
> diff --git a/mm/memremap.c b/mm/memremap.c
> index 15a074ffb8d7..ed593bf87109 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -140,14 +140,11 @@ static void pageunmap_range(struct dev_pagemap *pgmap, 
> int range_id)
>  {
> struct range *range = &pgmap->ranges[range_id];
> struct page *first_page;
> -   int nid;
>
> /* make sure to access a memmap that was actually initialized */
> first_page = pfn_to_page(pfn_first(pgmap, range_id));
>
> /* pages are dead and unused, undo the arch mapping */
> -   nid = page_to_nid(first_page);
> -
> mem_hotplug_begin();
> remove_pfn_range_from_zone(page_zone(first_page), 
> PHYS_PFN(range->start),
>PHYS_PFN(range_len(range)));
> @@ -155,7 +152,7 @@ static void pageunmap_range(struct dev_pagemap *pgmap, 
> int range_id)
> __remove_pages(PHYS_PFN(range->start),
>PHYS_PFN(range_len(range)), NULL);
> } else {
> -   arch_remove_memory(nid, range->start, range_len(range),
> +   arch_remove_memory(range->start, range_len(range),
> pgmap_altmap(pgmap));
> kasan_remove_zero_shadow(__va(range->start), 
> range_len(range));
> }

Reviewed-by: Pankaj Gupta 


Re: [PATCH] mm/pmem: Avoid inserting hugepage PTE entry with fsdax if hugepage support is disabled

2021-02-09 Thread Pankaj Gupta
> Differentiate between hardware not supporting hugepages and user disabling THP
> via 'echo never > /sys/kernel/mm/transparent_hugepage/enabled'
>
> For the devdax namespace, the kernel handles the above via the
> supported_alignment attribute and failing to initialize the namespace
> if the namespace align value is not supported on the platform.
>
> For the fsdax namespace, the kernel will continue to initialize
> the namespace. This can result in the kernel creating a huge pte
> entry even though the hardware don't support the same.
>
> We do want hugepage support with pmem even if the end-user disabled THP
> via sysfs file (/sys/kernel/mm/transparent_hugepage/enabled). Hence
> differentiate between hardware/firmware lacking support vs user-controlled
> disable of THP and prevent a huge fault if the hardware lacks hugepage
> support.
>
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  include/linux/huge_mm.h | 15 +--
>  mm/huge_memory.c|  6 +-
>  2 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 6a19f35f836b..ba973efcd369 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -78,6 +78,7 @@ static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault 
> *vmf, pfn_t pfn,
>  }
>
>  enum transparent_hugepage_flag {
> +   TRANSPARENT_HUGEPAGE_NEVER_DAX,
> TRANSPARENT_HUGEPAGE_FLAG,
> TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
> TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
> @@ -123,6 +124,13 @@ extern unsigned long transparent_hugepage_flags;
>   */
>  static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
>  {
> +
> +   /*
> +* If the hardware/firmware marked hugepage support disabled.
> +*/
> +   if (transparent_hugepage_flags & (1 << 
> TRANSPARENT_HUGEPAGE_NEVER_DAX))
> +   return false;
> +
> if (vma->vm_flags & VM_NOHUGEPAGE)
> return false;
>
> @@ -134,12 +142,7 @@ static inline bool __transparent_hugepage_enabled(struct 
> vm_area_struct *vma)
>
> if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
> return true;
> -   /*
> -* For dax vmas, try to always use hugepage mappings. If the kernel 
> does
> -* not support hugepages, fsdax mappings will fallback to PAGE_SIZE
> -* mappings, and device-dax namespaces, that try to guarantee a given
> -* mapping size, will fail to enable
> -*/
> +
> if (vma_is_dax(vma))
> return true;
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 9237976abe72..d698b7e27447 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -386,7 +386,11 @@ static int __init hugepage_init(void)
> struct kobject *hugepage_kobj;
>
> if (!has_transparent_hugepage()) {
> -   transparent_hugepage_flags = 0;
> +   /*
> +        * Hardware doesn't support hugepages, hence disable
> +* DAX PMD support.
> +*/
> +   transparent_hugepage_flags = 1 << 
> TRANSPARENT_HUGEPAGE_NEVER_DAX;
> return -EINVAL;
> }

 Reviewed-by: Pankaj Gupta 


Re: [RFC PATCH] powerpc/papr_scm: Implement scm async flush

2020-12-01 Thread Pankaj Gupta
> >> Tha patch implements SCM async-flush hcall and sets the
> >> ND_REGION_ASYNC capability when the platform device tree
> >> has "ibm,async-flush-required" set.
> >
> > So, you are reusing the existing ND_REGION_ASYNC flag for the
> > hypercall based async flush with device tree discovery?
> >
> > Out of curiosity, does virtio based flush work in ppc? Was just thinking
> > if we can reuse virtio based flush present in virtio-pmem? Or anything
> > else we are trying to achieve here?
> >
>
>
> Not with PAPR based pmem driver papr_scm.ko. The devices there are
> considered platform device and we use hypercalls to configure the
> device. On similar fashion we are now using hypercall to flush the host
> based caches.

o.k. Thanks for answering.

Best regards,
Pankaj

>
> -aneesh


Re: [RFC PATCH] powerpc/papr_scm: Implement scm async flush

2020-12-01 Thread Pankaj Gupta
> Tha patch implements SCM async-flush hcall and sets the
> ND_REGION_ASYNC capability when the platform device tree
> has "ibm,async-flush-required" set.

So, you are reusing the existing ND_REGION_ASYNC flag for the
hypercall based async flush with device tree discovery?

Out of curiosity, does virtio based flush work in ppc? Was just thinking
if we can reuse virtio based flush present in virtio-pmem? Or anything
else we are trying to achieve here?

Thanks,
Pankaj
>
> The below demonstration shows the map_sync behavior when
> ibm,async-flush-required is present in device tree.
> (https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c)
>
> The pmem0 is from nvdimm without async-flush-required,
> and pmem1 is from nvdimm with async-flush-required, mounted as
> /dev/pmem0 on /mnt1 type xfs 
> (rw,relatime,attr2,dax=always,inode64,logbufs=8,logbsize=32k,noquota)
> /dev/pmem1 on /mnt2 type xfs 
> (rw,relatime,attr2,dax=always,inode64,logbufs=8,logbsize=32k,noquota)
>
> #./mapsync /mnt1/newfile> Without async-flush-required
> #./mapsync /mnt2/newfile> With async-flush-required
> Failed to mmap  with Operation not supported
>
> Signed-off-by: Shivaprasad G Bhat 
> ---
> The HCALL semantics are in review, not final.

Any link of the discussion?

>
>  Documentation/powerpc/papr_hcalls.rst |   14 ++
>  arch/powerpc/include/asm/hvcall.h |3 +-
>  arch/powerpc/platforms/pseries/papr_scm.c |   39 
> +
>  3 files changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/powerpc/papr_hcalls.rst 
> b/Documentation/powerpc/papr_hcalls.rst
> index 48fcf1255a33..cc310814f24c 100644
> --- a/Documentation/powerpc/papr_hcalls.rst
> +++ b/Documentation/powerpc/papr_hcalls.rst
> @@ -275,6 +275,20 @@ Health Bitmap Flags:
>  Given a DRC Index collect the performance statistics for NVDIMM and copy them
>  to the resultBuffer.
>
> +**H_SCM_ASYNC_FLUSH**
> +
> +| Input: *drcIndex*
> +| Out: *continue-token*
> +| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
> +
> +Given a DRC Index Flush the data to backend NVDIMM device.
> +
> +The hcall returns H_BUSY when the flush takes longer time and the hcall needs
> +to be issued multiple times in order to be completely serviced. The
> +*continue-token* from the output to be passed in the argument list in
> +subsequent hcalls to the hypervisor until the hcall is completely serviced
> +at which point H_SUCCESS is returned by the hypervisor.
> +
>  References
>  ==
>  .. [1] "Power Architecture Platform Reference"
> diff --git a/arch/powerpc/include/asm/hvcall.h 
> b/arch/powerpc/include/asm/hvcall.h
> index c1fbccb04390..4a13074bc782 100644
> --- a/arch/powerpc/include/asm/hvcall.h
> +++ b/arch/powerpc/include/asm/hvcall.h
> @@ -306,7 +306,8 @@
>  #define H_SCM_HEALTH0x400
>  #define H_SCM_PERFORMANCE_STATS 0x418
>  #define H_RPT_INVALIDATE   0x448
> -#define MAX_HCALL_OPCODE   H_RPT_INVALIDATE
> +#define H_SCM_ASYNC_FLUSH  0x4A0
> +#define MAX_HCALL_OPCODE   H_SCM_ASYNC_FLUSH
>
>  /* Scope args for H_SCM_UNBIND_ALL */
>  #define H_UNBIND_SCOPE_ALL (0x1)
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c 
> b/arch/powerpc/platforms/pseries/papr_scm.c
> index 835163f54244..1f8c5153cb3d 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -93,6 +93,7 @@ struct papr_scm_priv {
> uint64_t block_size;
> int metadata_size;
> bool is_volatile;
> +   bool async_flush_required;
>
> uint64_t bound_addr;
>
> @@ -117,6 +118,38 @@ struct papr_scm_priv {
> size_t stat_buffer_len;
>  };
>
> +static int papr_scm_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> +{
> +   unsigned long ret[PLPAR_HCALL_BUFSIZE];
> +   struct papr_scm_priv *p = nd_region_provider_data(nd_region);
> +   int64_t rc;
> +   uint64_t token = 0;
> +
> +   do {
> +   rc = plpar_hcall(H_SCM_ASYNC_FLUSH, ret, p->drc_index, token);
> +
> +   /* Check if we are stalled for some time */
> +   token = ret[0];
> +   if (H_IS_LONG_BUSY(rc)) {
> +   msleep(get_longbusy_msecs(rc));
> +   rc = H_BUSY;
> +   } else if (rc == H_BUSY) {
> +   cond_resched();
> +   }
> +
> +   } while (rc == H_BUSY);
> +
> +   if (rc)
> +   dev_err(&p->pdev->dev, "flush error: %lld\n", rc);
> +   else
> +   dev_dbg(&p->pdev->dev, "flush drc 0x%x complete\n",
> +   p->drc_index);
> +
> +   dev_dbg(&p->pdev->dev, "Flush call complete\n");
> +
> +   return rc;
> +}
> +
>  static LIST_HEAD(papr_nd_regions);
>  static DEFINE_MUTEX(papr_ndr_lock);
>
> @@ -943,6 +976,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
> ndr_desc.num_mappings = 1;
>

Re: [PATCH v3 3/7] mm/memory_hotplug: prepare passing flags to add_memory() and friends

2020-09-10 Thread Pankaj Gupta
> We soon want to pass flags, e.g., to mark added System RAM resources.
> mergeable. Prepare for that.
>
> This patch is based on a similar patch by Oscar Salvador:
>
> https://lkml.kernel.org/r/20190625075227.15193-3-osalva...@suse.de
>
> Acked-by: Wei Liu 
> Reviewed-by: Juergen Gross  # Xen related part
> Cc: Andrew Morton 
> Cc: Michal Hocko 
> Cc: Dan Williams 
> Cc: Jason Gunthorpe 
> Cc: Pankaj Gupta 
> Cc: Baoquan He 
> Cc: Wei Yang 
> Cc: Michael Ellerman 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: "Rafael J. Wysocki" 
> Cc: Len Brown 
> Cc: Greg Kroah-Hartman 
> Cc: Vishal Verma 
> Cc: Dave Jiang 
> Cc: "K. Y. Srinivasan" 
> Cc: Haiyang Zhang 
> Cc: Stephen Hemminger 
> Cc: Wei Liu 
> Cc: Heiko Carstens 
> Cc: Vasily Gorbik 
> Cc: Christian Borntraeger 
> Cc: David Hildenbrand 
> Cc: "Michael S. Tsirkin" 
> Cc: Jason Wang 
> Cc: Boris Ostrovsky 
> Cc: Juergen Gross 
> Cc: Stefano Stabellini 
> Cc: "Oliver O'Halloran" 
> Cc: Pingfan Liu 
> Cc: Nathan Lynch 
> Cc: Libor Pechacek 
> Cc: Anton Blanchard 
> Cc: Leonardo Bras 
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-a...@vger.kernel.org
> Cc: linux-nvd...@lists.01.org
> Cc: linux-hyp...@vger.kernel.org
> Cc: linux-s...@vger.kernel.org
> Cc: virtualizat...@lists.linux-foundation.org
> Cc: xen-de...@lists.xenproject.org
> Signed-off-by: David Hildenbrand 
> ---
>  arch/powerpc/platforms/powernv/memtrace.c   |  2 +-
>  arch/powerpc/platforms/pseries/hotplug-memory.c |  2 +-
>  drivers/acpi/acpi_memhotplug.c  |  3 ++-
>  drivers/base/memory.c   |  3 ++-
>  drivers/dax/kmem.c  |  2 +-
>  drivers/hv/hv_balloon.c |  2 +-
>  drivers/s390/char/sclp_cmd.c|  2 +-
>  drivers/virtio/virtio_mem.c |  2 +-
>  drivers/xen/balloon.c   |  2 +-
>  include/linux/memory_hotplug.h  | 16 
>  mm/memory_hotplug.c | 14 +++---
>  11 files changed, 30 insertions(+), 20 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/memtrace.c 
> b/arch/powerpc/platforms/powernv/memtrace.c
> index 13b369d2cc454..6828108486f83 100644
> --- a/arch/powerpc/platforms/powernv/memtrace.c
> +++ b/arch/powerpc/platforms/powernv/memtrace.c
> @@ -224,7 +224,7 @@ static int memtrace_online(void)
> ent->mem = 0;
> }
>
> -   if (add_memory(ent->nid, ent->start, ent->size)) {
> +   if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) {
> pr_err("Failed to add trace memory to node %d\n",
> ent->nid);
> ret += 1;
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 0ea976d1cac47..e1c9fa0d730f5 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -615,7 +615,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
> nid = memory_add_physaddr_to_nid(lmb->base_addr);
>
> /* Add the memory */
> -   rc = __add_memory(nid, lmb->base_addr, block_sz);
> +   rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_NONE);
> if (rc) {
> invalidate_lmb_associativity_index(lmb);
> return rc;
> diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
> index e294f44a78504..2067c3bc55763 100644
> --- a/drivers/acpi/acpi_memhotplug.c
> +++ b/drivers/acpi/acpi_memhotplug.c
> @@ -207,7 +207,8 @@ static int acpi_memory_enable_device(struct 
> acpi_memory_device *mem_device)
> if (node < 0)
> node = memory_add_physaddr_to_nid(info->start_addr);
>
> -   result = __add_memory(node, info->start_addr, info->length);
> +   result = __add_memory(node, info->start_addr, info->length,
> + MHP_NONE);
>
> /*
>  * If the memory block has been used by the kernel, 
> add_memory()
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 4db3c660de831..b4c297dd04755 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -432,7 +432,8 @@ static ssize_t probe_store(struct device *dev, struct 
> device_attribute *attr,
>
> nid = memory_add_physaddr_to_nid(phys_addr);
> ret = __add_memory(nid, phys_a

Re: [PATCH v2] mm/sparse: Fix kernel crash with pfn_section_valid check

2020-03-26 Thread Pankaj Gupta
>
> Fixes the below crash
>
> BUG: Kernel NULL pointer dereference on read at 0x
> Faulting instruction address: 0xc0c3447c
> Oops: Kernel access of bad area, sig: 11 [#1]
> LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
> CPU: 11 PID: 7519 Comm: lt-ndctl Not tainted 5.6.0-rc7-autotest #1
> ...
> NIP [c0c3447c] vmemmap_populated+0x98/0xc0
> LR [c0088354] vmemmap_free+0x144/0x320
> Call Trace:
>  section_deactivate+0x220/0x240
>  __remove_pages+0x118/0x170
>  arch_remove_memory+0x3c/0x150
>  memunmap_pages+0x1cc/0x2f0
>  devm_action_release+0x30/0x50
>  release_nodes+0x2f8/0x3e0
>  device_release_driver_internal+0x168/0x270
>  unbind_store+0x130/0x170
>  drv_attr_store+0x44/0x60
>  sysfs_kf_write+0x68/0x80
>  kernfs_fop_write+0x100/0x290
>  __vfs_write+0x3c/0x70
>  vfs_write+0xcc/0x240
>  ksys_write+0x7c/0x140
>  system_call+0x5c/0x68
>
> The crash is due to NULL dereference at
>
> test_bit(idx, ms->usage->subsection_map); due to ms->usage = NULL; in 
> pfn_section_valid()
>
> With commit: d41e2f3bd546 ("mm/hotplug: fix hot remove failure in 
> SPARSEMEM|!VMEMMAP case")
> section_mem_map is set to NULL after depopulate_section_mem(). This
> was done so that pfn_page() can work correctly with kernel config that 
> disables
> SPARSEMEM_VMEMMAP. With that config pfn_to_page does
>
> __section_mem_map_addr(__sec) + __pfn;
> where
>
> static inline struct page *__section_mem_map_addr(struct mem_section *section)
> {
> unsigned long map = section->section_mem_map;
> map &= SECTION_MAP_MASK;
> return (struct page *)map;
> }
>
> Now with SPASEMEM_VMEMAP enabled, mem_section->usage->subsection_map is used 
> to
> check the pfn validity (pfn_valid()). Since section_deactivate release
> mem_section->usage if a section is fully deactivated, pfn_valid() check after
> a subsection_deactivate cause a kernel crash.
>
> static inline int pfn_valid(unsigned long pfn)
> {
> ...
> return early_section(ms) || pfn_section_valid(ms, pfn);
> }
>
> where
>
> static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
> {
> int idx = subsection_map_index(pfn);
>
> return test_bit(idx, ms->usage->subsection_map);
> }
>
> Avoid this by clearing SECTION_HAS_MEM_MAP when mem_section->usage is freed.
> For architectures like ppc64 where large pages are used for vmmemap mapping 
> (16MB),
> a specific vmemmap mapping can cover multiple sections. Hence before a vmemmap
> mapping page can be freed, the kernel needs to make sure there are no valid 
> sections
> within that mapping. Clearing the section valid bit before
> depopulate_section_memap enables this.
>
> Fixes: d41e2f3bd546 ("mm/hotplug: fix hot remove failure in 
> SPARSEMEM|!VMEMMAP case")
> Reported-by: Sachin Sant 
> Tested-by: Sachin Sant 
> Cc: Baoquan He 
> Cc: Michael Ellerman 
> Cc: Dan Williams 
> Cc: Pankaj Gupta 
> Cc: David Hildenbrand 
> Cc: Michal Hocko 
> Cc: Wei Yang 
> Cc: Oscar Salvador 
> Cc: Mike Rapoport 
> Cc: 
> Signed-off-by: Aneesh Kumar K.V 
> ---
>  mm/sparse.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/mm/sparse.c b/mm/sparse.c
> index aadb7298dcef..65599e8bd636 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -781,6 +781,12 @@ static void section_deactivate(unsigned long pfn, 
> unsigned long nr_pages,
> ms->usage = NULL;
> }
> memmap = sparse_decode_mem_map(ms->section_mem_map, 
> section_nr);
> +   /*
> +* Mark the section invalid so that valid_section()
> +* return false. This prevents code from dereferencing
> +* ms->usage array.
> +*/
> +   ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
> }
>
> if (section_is_early && memmap)
> --
Agree with Michal, comment for clearing the section would be nicer.

Fix looks good to me.
Acked-by: Pankaj Gupta 

> 2.25.1
>


Re: [PATCH v3 8/8] mm/memory_hotplug: allow to specify a default online_type

2020-03-19 Thread Pankaj Gupta
> For now, distributions implement advanced udev rules to essentially
> - Don't online any hotplugged memory (s390x)
> - Online all memory to ZONE_NORMAL (e.g., most virt environments like
>   hyperv)
> - Online all memory to ZONE_MOVABLE in case the zone imbalance is taken
>   care of (e.g., bare metal, special virt environments)
>
> In summary: All memory is usually onlined the same way, however, the
> kernel always has to ask user space to come up with the same answer.
> E.g., Hyper-V always waits for a memory block to get onlined before
> continuing, otherwise it might end up adding memory faster than
> onlining it, which can result in strange OOM situations. This waiting
> slows down adding of a bigger amount of memory.
>
> Let's allow to specify a default online_type, not just "online" and
> "offline". This allows distributions to configure the default online_type
> when booting up and be done with it.
>
> We can now specify "offline", "online", "online_movable" and
> "online_kernel" via
> - "memhp_default_state=" on the kernel cmdline
> - /sys/devices/system/memory/auto_online_blocks
> just like we are able to specify for a single memory block via
> /sys/devices/system/memory/memoryX/state
>
> Reviewed-by: Wei Yang 
> Acked-by: Michal Hocko 
> Cc: Greg Kroah-Hartman 
> Cc: Andrew Morton 
> Cc: Michal Hocko 
> Cc: Oscar Salvador 
> Cc: "Rafael J. Wysocki" 
> Cc: Baoquan He 
> Cc: Wei Yang 
> Signed-off-by: David Hildenbrand 
> ---
>  drivers/base/memory.c  | 11 +--
>  include/linux/memory_hotplug.h |  2 ++
>  mm/memory_hotplug.c|  8 
>  3 files changed, 11 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 8d3e16dab69f..2b09b68b9f78 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -35,7 +35,7 @@ static const char *const online_type_to_str[] = {
> [MMOP_ONLINE_MOVABLE] = "online_movable",
>  };
>
> -static int memhp_online_type_from_str(const char *str)
> +int memhp_online_type_from_str(const char *str)
>  {
> int i;
>
> @@ -394,13 +394,12 @@ static ssize_t auto_online_blocks_store(struct device 
> *dev,
> struct device_attribute *attr,
> const char *buf, size_t count)
>  {
> -   if (sysfs_streq(buf, "online"))
> -   memhp_default_online_type = MMOP_ONLINE;
> -   else if (sysfs_streq(buf, "offline"))
> -   memhp_default_online_type = MMOP_OFFLINE;
> -   else
> +   const int online_type = memhp_online_type_from_str(buf);
> +
> +   if (online_type < 0)
> return -EINVAL;
>
> +   memhp_default_online_type = online_type;
> return count;
>  }
>
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 6d6f85bb66e9..93d9ada74ddd 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -118,6 +118,8 @@ extern int arch_add_memory(int nid, u64 start, u64 size,
>struct mhp_params *params);
>  extern u64 max_mem_size;
>
> +extern int memhp_online_type_from_str(const char *str);
> +
>  /* Default online_type (MMOP_*) when new memory blocks are added. */
>  extern int memhp_default_online_type;
>  /* If movable_node boot option specified */
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 4efcf8cb9ac5..89197163d138 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -74,10 +74,10 @@ int memhp_default_online_type = MMOP_ONLINE;
>
>  static int __init setup_memhp_default_state(char *str)
>  {
> -   if (!strcmp(str, "online"))
> -   memhp_default_online_type = MMOP_ONLINE;
> -   else if (!strcmp(str, "offline"))
> -   memhp_default_online_type = MMOP_OFFLINE;
> +   const int online_type = memhp_online_type_from_str(str);
> +
> +   if (online_type >= 0)
> +   memhp_default_online_type = online_type;
>
> return 1;
>  }
> --
Acked-by: Pankaj Gupta 

> 2.24.1
>
>


Re: [PATCH v3 7/8] mm/memory_hotplug: convert memhp_auto_online to store an online_type

2020-03-19 Thread Pankaj Gupta
> ... and rename it to memhp_default_online_type. This is a preparation
> for more detailed default online behavior.
>
> Reviewed-by: Wei Yang 
> Acked-by: Michal Hocko 
> Cc: Greg Kroah-Hartman 
> Cc: Andrew Morton 
> Cc: Michal Hocko 
> Cc: Oscar Salvador 
> Cc: "Rafael J. Wysocki" 
> Cc: Baoquan He 
> Cc: Wei Yang 
> Signed-off-by: David Hildenbrand 
> ---
>  drivers/base/memory.c  | 10 --
>  include/linux/memory_hotplug.h |  3 ++-
>  mm/memory_hotplug.c| 11 ++-
>  3 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 8a7f29c0bf97..8d3e16dab69f 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -386,10 +386,8 @@ static DEVICE_ATTR_RO(block_size_bytes);
>  static ssize_t auto_online_blocks_show(struct device *dev,
>struct device_attribute *attr, char 
> *buf)
>  {
> -   if (memhp_auto_online)
> -   return sprintf(buf, "online\n");
> -   else
> -   return sprintf(buf, "offline\n");
> +   return sprintf(buf, "%s\n",
> +  online_type_to_str[memhp_default_online_type]);
>  }
>
>  static ssize_t auto_online_blocks_store(struct device *dev,
> @@ -397,9 +395,9 @@ static ssize_t auto_online_blocks_store(struct device 
> *dev,
> const char *buf, size_t count)
>  {
> if (sysfs_streq(buf, "online"))
> -   memhp_auto_online = true;
> +   memhp_default_online_type = MMOP_ONLINE;
> else if (sysfs_streq(buf, "offline"))
> -   memhp_auto_online = false;
> +   memhp_default_online_type = MMOP_OFFLINE;
> else
> return -EINVAL;
>
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 76f3c617a8ab..6d6f85bb66e9 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -118,7 +118,8 @@ extern int arch_add_memory(int nid, u64 start, u64 size,
>struct mhp_params *params);
>  extern u64 max_mem_size;
>
> -extern bool memhp_auto_online;
> +/* Default online_type (MMOP_*) when new memory blocks are added. */
> +extern int memhp_default_online_type;
>  /* If movable_node boot option specified */
>  extern bool movable_node_enabled;
>  static inline bool movable_node_is_enabled(void)
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index e21a7d53ade5..4efcf8cb9ac5 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -67,17 +67,17 @@ void put_online_mems(void)
>  bool movable_node_enabled = false;
>
>  #ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
> -bool memhp_auto_online;
> +int memhp_default_online_type = MMOP_OFFLINE;
>  #else
> -bool memhp_auto_online = true;
> +int memhp_default_online_type = MMOP_ONLINE;
>  #endif
>
>  static int __init setup_memhp_default_state(char *str)
>  {
> if (!strcmp(str, "online"))
> -   memhp_auto_online = true;
> +   memhp_default_online_type = MMOP_ONLINE;
> else if (!strcmp(str, "offline"))
> -   memhp_auto_online = false;
> +   memhp_default_online_type = MMOP_OFFLINE;
>
> return 1;
>  }
> @@ -993,6 +993,7 @@ static int check_hotplug_memory_range(u64 start, u64 size)
>
>  static int online_memory_block(struct memory_block *mem, void *arg)
>  {
> +   mem->online_type = memhp_default_online_type;
> return device_online(&mem->dev);
>  }
>
> @@ -1065,7 +1066,7 @@ int __ref add_memory_resource(int nid, struct resource 
> *res)
> mem_hotplug_done();
>
> /* online pages if requested */
> -   if (memhp_auto_online)
> +   if (memhp_default_online_type != MMOP_OFFLINE)
> walk_memory_blocks(start, size, NULL, online_memory_block);
>
> return ret;
> --

Acked-by: Pankaj Gupta 

> 2.24.1
>
>


Re: [PATCH v3 6/8] mm/memory_hotplug: unexport memhp_auto_online

2020-03-19 Thread Pankaj Gupta
> All in-tree users except the mm-core are gone. Let's drop the export.
>
> Reviewed-by: Wei Yang 
> Acked-by: Michal Hocko 
> Cc: Andrew Morton 
> Cc: Michal Hocko 
> Cc: Oscar Salvador 
> Cc: "Rafael J. Wysocki" 
> Cc: Baoquan He 
> Cc: Wei Yang 
> Signed-off-by: David Hildenbrand 
> ---
>  mm/memory_hotplug.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index da6aab272c9b..e21a7d53ade5 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -71,7 +71,6 @@ bool memhp_auto_online;
>  #else
>  bool memhp_auto_online = true;
>  #endif
> -EXPORT_SYMBOL_GPL(memhp_auto_online);
>
>  static int __init setup_memhp_default_state(char *str)
>  {
> --
> 2.24.1

Acked-by: Pankaj Gupta 
>
>


Re: [PATCH v3 3/8] drivers/base/memory: store mapping between MMOP_* and string in an array

2020-03-19 Thread Pankaj Gupta
> Let's use a simple array which we can reuse soon. While at it, move the
> string->mmop conversion out of the device hotplug lock.
>
> Reviewed-by: Wei Yang 
> Acked-by: Michal Hocko 
> Cc: Greg Kroah-Hartman 
> Cc: Andrew Morton 
> Cc: Michal Hocko 
> Cc: Oscar Salvador 
> Cc: "Rafael J. Wysocki" 
> Cc: Baoquan He 
> Cc: Wei Yang 
> Signed-off-by: David Hildenbrand 
> ---
>  drivers/base/memory.c | 38 +++---
>  1 file changed, 23 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index e7e77cafef80..8a7f29c0bf97 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -28,6 +28,24 @@
>
>  #define MEMORY_CLASS_NAME  "memory"
>
> +static const char *const online_type_to_str[] = {
> +   [MMOP_OFFLINE] = "offline",
> +   [MMOP_ONLINE] = "online",
> +   [MMOP_ONLINE_KERNEL] = "online_kernel",
> +   [MMOP_ONLINE_MOVABLE] = "online_movable",
> +};
> +
> +static int memhp_online_type_from_str(const char *str)
> +{
> +   int i;
> +
> +   for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) {
> +   if (sysfs_streq(str, online_type_to_str[i]))
> +   return i;
> +   }
> +   return -EINVAL;
> +}
> +
>  #define to_memory_block(dev) container_of(dev, struct memory_block, dev)
>
>  static int sections_per_block;
> @@ -236,26 +254,17 @@ static int memory_subsys_offline(struct device *dev)
>  static ssize_t state_store(struct device *dev, struct device_attribute *attr,
>const char *buf, size_t count)
>  {
> +   const int online_type = memhp_online_type_from_str(buf);
> struct memory_block *mem = to_memory_block(dev);
> -   int ret, online_type;
> +   int ret;
> +
> +   if (online_type < 0)
> +   return -EINVAL;
>
> ret = lock_device_hotplug_sysfs();
> if (ret)
> return ret;
>
> -   if (sysfs_streq(buf, "online_kernel"))
> -   online_type = MMOP_ONLINE_KERNEL;
> -   else if (sysfs_streq(buf, "online_movable"))
> -   online_type = MMOP_ONLINE_MOVABLE;
> -   else if (sysfs_streq(buf, "online"))
> -   online_type = MMOP_ONLINE;
> -   else if (sysfs_streq(buf, "offline"))
> -   online_type = MMOP_OFFLINE;
> -   else {
> -   ret = -EINVAL;
> -   goto err;
> -   }
> -
> switch (online_type) {
> case MMOP_ONLINE_KERNEL:
> case MMOP_ONLINE_MOVABLE:
> @@ -271,7 +280,6 @@ static ssize_t state_store(struct device *dev, struct 
> device_attribute *attr,
> ret = -EINVAL; /* should never happen */
> }
>
> -err:
> unlock_device_hotplug();
>
> if (ret < 0)
> --

Nice cleanup patch.
Acked-by: Pankaj Gupta 

> 2.24.1
>
>


Re: [PATCH v3 2/8] drivers/base/memory: map MMOP_OFFLINE to 0

2020-03-19 Thread Pankaj Gupta
> Historically, we used the value -1. Just treat 0 as the special
> case now. Clarify a comment (which was wrong, when we come via
> device_online() the first time, the online_type would have been 0 /
> MEM_ONLINE). The default is now always MMOP_OFFLINE. This removes the
> last user of the manual "-1", which didn't use the enum value.
>
> This is a preparation to use the online_type as an array index.
>
> Reviewed-by: Wei Yang 
> Acked-by: Michal Hocko 
> Cc: Greg Kroah-Hartman 
> Cc: Andrew Morton 
> Cc: Michal Hocko 
> Cc: Oscar Salvador 
> Cc: "Rafael J. Wysocki" 
> Cc: Baoquan He 
> Cc: Wei Yang 
> Signed-off-by: David Hildenbrand 
> ---
>  drivers/base/memory.c  | 11 ---
>  include/linux/memory_hotplug.h |  2 +-
>  2 files changed, 5 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 8c5ce42c0fc3..e7e77cafef80 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -211,17 +211,14 @@ static int memory_subsys_online(struct device *dev)
> return 0;
>
> /*
> -* If we are called from state_store(), online_type will be
> -* set >= 0 Otherwise we were called from the device online
> -* attribute and need to set the online_type.
> +* When called via device_online() without configuring the 
> online_type,
> +* we want to default to MMOP_ONLINE.
>  */
> -   if (mem->online_type < 0)
> +   if (mem->online_type == MMOP_OFFLINE)
> mem->online_type = MMOP_ONLINE;
>
> ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
> -
> -   /* clear online_type */
> -   mem->online_type = -1;
> +   mem->online_type = MMOP_OFFLINE;
>
> return ret;
>  }
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 3aaf00db224c..76f3c617a8ab 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -48,7 +48,7 @@ enum {
>  /* Types for control the zone type of onlined and offlined memory */
>  enum {
> /* Offline the memory. */
> -   MMOP_OFFLINE = -1,
> +   MMOP_OFFLINE = 0,
> /* Online the memory. Zone depends, see default_zone_for_pfn(). */
> MMOP_ONLINE,
> /* Online the memory to ZONE_NORMAL. */
> --
> 2.24.1

Looks good to me.
Acked-by: Pankaj Gupta 
>
>


Re: [PATCH v3 1/8] drivers/base/memory: rename MMOP_ONLINE_KEEP to MMOP_ONLINE

2020-03-19 Thread Pankaj Gupta
> The name is misleading and it's not really clear what is "kept". Let's just
> name it like the online_type name we expose to user space ("online").
>
> Add some documentation to the types.
>
> Reviewed-by: Wei Yang 
> Cc: Greg Kroah-Hartman 
> Cc: Andrew Morton 
> Cc: Michal Hocko 
> Cc: Oscar Salvador 
> Cc: "Rafael J. Wysocki" 
> Cc: Baoquan He 
> Cc: Wei Yang 
> Signed-off-by: David Hildenbrand 
> ---
>  drivers/base/memory.c  | 9 +
>  include/linux/memory_hotplug.h | 6 +-
>  2 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 6448c9ece2cb..8c5ce42c0fc3 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -216,7 +216,7 @@ static int memory_subsys_online(struct device *dev)
>  * attribute and need to set the online_type.
>  */
> if (mem->online_type < 0)
> -   mem->online_type = MMOP_ONLINE_KEEP;
> +   mem->online_type = MMOP_ONLINE;
>
> ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
>
> @@ -251,7 +251,7 @@ static ssize_t state_store(struct device *dev, struct 
> device_attribute *attr,
> else if (sysfs_streq(buf, "online_movable"))
> online_type = MMOP_ONLINE_MOVABLE;
> else if (sysfs_streq(buf, "online"))
> -   online_type = MMOP_ONLINE_KEEP;
> +   online_type = MMOP_ONLINE;
> else if (sysfs_streq(buf, "offline"))
> online_type = MMOP_OFFLINE;
> else {
> @@ -262,7 +262,7 @@ static ssize_t state_store(struct device *dev, struct 
> device_attribute *attr,
> switch (online_type) {
> case MMOP_ONLINE_KERNEL:
> case MMOP_ONLINE_MOVABLE:
> -   case MMOP_ONLINE_KEEP:
> +   case MMOP_ONLINE:
> /* mem->online_type is protected by device_hotplug_lock */
> mem->online_type = online_type;
> ret = device_online(&mem->dev);
> @@ -342,7 +342,8 @@ static ssize_t valid_zones_show(struct device *dev,
> }
>
> nid = mem->nid;
> -   default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, 
> nr_pages);
> +   default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn,
> + nr_pages);
> strcat(buf, default_zone->name);
>
> print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL,
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 3195d11876ea..3aaf00db224c 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -47,9 +47,13 @@ enum {
>
>  /* Types for control the zone type of onlined and offlined memory */
>  enum {
> +   /* Offline the memory. */
> MMOP_OFFLINE = -1,
> -   MMOP_ONLINE_KEEP,
> +   /* Online the memory. Zone depends, see default_zone_for_pfn(). */
> +   MMOP_ONLINE,
> +   /* Online the memory to ZONE_NORMAL. */
> MMOP_ONLINE_KERNEL,
> +   /* Online the memory to ZONE_MOVABLE. */
> MMOP_ONLINE_MOVABLE,
>  };
>
> --
Looks good to me.

Acked-by: Pankaj Gupta 

> 2.24.1
>
>


Re: [PATCH 1/2] libnvdimm/altmap: Track namespace boundaries in altmap

2019-09-10 Thread Pankaj Gupta
gt;   struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
>   resource_size_t base = nsio->res.start + start_pad;
> + resource_size_t end = nsio->res.end - end_trunc;
>   struct vmem_altmap __altmap = {
>   .base_pfn = init_altmap_base(base),
>   .reserve = init_altmap_reserve(base),
> + .end_pfn = PHYS_PFN(end),
>   };
>  
>   memcpy(res, &nsio->res, sizeof(*res));
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index f8a5b2a19945..c70996fe48c8 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -17,6 +17,7 @@ struct device;
>   */
>  struct vmem_altmap {
>   const unsigned long base_pfn;
> + const unsigned long end_pfn;
>   const unsigned long reserve;
>   unsigned long free;
>   unsigned long align;
> --
> 2.21.0

This patch looks good to me. It helps to prevent
namespace access across boundaries for altmap
hugepage allocation.

Reviewed-by: Pankaj Gupta 

Thanks,
Pankaj
> 
> ___
> Linux-nvdimm mailing list
> linux-nvd...@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
>