On 10/17/2018 2:18 AM, Michal Hocko wrote:
On Mon 15-10-18 13:27:16, Alexander Duyck wrote:
This patch is going through and combining the bits in memmap_init_zone and
memmap_init_zone_device that are related to hotplug into a single function
called __memmap_init_hotplug.

I also took the opportunity to integrate __init_single_page's functionality
into this function. In doing so I can get rid of some of the redundancy
such as the LRU pointers versus the pgmap.

This patch depends on [1] which I've had some concerns about. It adds
more code on top. I am still not convinced this is the right direction.

[1] 
http://lkml.kernel.org/r/20180925202053.3576.66039.stgit@localhost.localdomain

I wouldn't say this patch depends on it. The fact is I could easily make this patch work with the code as it was before that change. This was actually something I had started on first and then decided to move until later since I knew this was more invasive than the memmap_init_zone_device function was.

With that said I am also wondering if a possible solution to the complaints you had would be to look at just exporting the __init_pageblock function later and moving the call to memmap_init_zone_device out to the memremap or hotplug code when Dan gets the refactoring for HMM and memremap all sorted out.


Signed-off-by: Alexander Duyck <alexander.h.du...@linux.intel.com>
---
  mm/page_alloc.c |  232 ++++++++++++++++++++++++++++++++++++++-----------------
  1 file changed, 159 insertions(+), 73 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 20e9eb35d75d..92375e7867ba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1192,6 +1192,94 @@ static void __meminit __init_single_page(struct page 
*page, unsigned long pfn,
  #endif
  }
+static void __meminit __init_pageblock(unsigned long start_pfn,
+                                      unsigned long nr_pages,
+                                      unsigned long zone, int nid,
+                                      struct dev_pagemap *pgmap,
+                                      bool is_reserved)
+{
+       unsigned long nr_pgmask = pageblock_nr_pages - 1;
+       struct page *start_page = pfn_to_page(start_pfn);
+       unsigned long pfn = start_pfn + nr_pages - 1;
+#ifdef WANT_PAGE_VIRTUAL
+       bool is_highmem = is_highmem_idx(zone);
+#endif
+       struct page *page;
+
+       /*
+        * Enforce the following requirements:
+        * size > 0
+        * size < pageblock_nr_pages
+        * start_pfn -> pfn does not cross pageblock_nr_pages boundary
+        */
+       VM_BUG_ON(((start_pfn ^ pfn) | (nr_pages - 1)) > nr_pgmask);
+
+       /*
+        * Work from highest page to lowest, this way we will still be
+        * warm in the cache when we call set_pageblock_migratetype
+        * below.
+        *
+        * The loop is based around the page pointer as the main index
+        * instead of the pfn because pfn is not used inside the loop if
+        * the section number is not in page flags and WANT_PAGE_VIRTUAL
+        * is not defined.
+        */
+       for (page = start_page + nr_pages; page-- != start_page; pfn--) {
+               mm_zero_struct_page(page);
+
+               /*
+                * We use the start_pfn instead of pfn in the set_page_links
+                * call because of the fact that the pfn number is used to
+                * get the section_nr and this function should not be
+                * spanning more than a single section.
+                */
+               set_page_links(page, zone, nid, start_pfn);
+               init_page_count(page);
+               page_mapcount_reset(page);
+               page_cpupid_reset_last(page);
+
+               /*
+                * We can use the non-atomic __set_bit operation for setting
+                * the flag as we are still initializing the pages.
+                */
+               if (is_reserved)
+                       __SetPageReserved(page);
+
+               /*
+                * ZONE_DEVICE pages union ->lru with a ->pgmap back
+                * pointer and hmm_data.  It is a bug if a ZONE_DEVICE
+                * page is ever freed or placed on a driver-private list.
+                */
+               page->pgmap = pgmap;
+               if (!pgmap)
+                       INIT_LIST_HEAD(&page->lru);
+
+#ifdef WANT_PAGE_VIRTUAL
+               /* The shift won't overflow because ZONE_NORMAL is below 4G. */
+               if (!is_highmem)
+                       set_page_address(page, __va(pfn << PAGE_SHIFT));
+#endif
+       }
+
+       /*
+        * Mark the block movable so that blocks are reserved for
+        * movable at startup. This will force kernel allocations
+        * to reserve their blocks rather than leaking throughout
+        * the address space during boot when many long-lived
+        * kernel allocations are made.
+        *
+        * bitmap is created for zone's valid pfn range. but memmap
+        * can be created for invalid pages (for alignment)
+        * check here not to call set_pageblock_migratetype() against
+        * pfn out of zone.
+        *
+        * Please note that MEMMAP_HOTPLUG path doesn't clear memmap
+        * because this is done early in sparse_add_one_section
+        */
+       if (!(start_pfn & nr_pgmask))
+               set_pageblock_migratetype(start_page, MIGRATE_MOVABLE);
+}
+
  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
  static void __meminit init_reserved_page(unsigned long pfn)
  {
@@ -5513,6 +5601,36 @@ void __ref build_all_zonelists(pg_data_t *pgdat)
        return false;
  }
+static void __meminit __memmap_init_hotplug(unsigned long size, int nid,
+                                           unsigned long zone,
+                                           unsigned long start_pfn,
+                                           struct dev_pagemap *pgmap)
+{
+       unsigned long pfn = start_pfn + size;
+
+       while (pfn != start_pfn) {
+               unsigned long stride = pfn;
+
+               pfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn);
+               stride -= pfn;
+
+               /*
+                * The last argument of __init_pageblock is a boolean
+                * value indicating if the page will be marked as reserved.
+                *
+                * Mark page reserved as it will need to wait for onlining
+                * phase for it to be fully associated with a zone.
+                *
+                * Under certain circumstances ZONE_DEVICE pages may not
+                * need to be marked as reserved, however there is still
+                * code that is depending on this being set for now.
+                */
+               __init_pageblock(pfn, stride, zone, nid, pgmap, true);
+
+               cond_resched();
+       }
+}
+
  /*
   * Initially all pages are reserved - free ones are freed
   * up by memblock_free_all() once the early boot process is
@@ -5523,51 +5641,61 @@ void __meminit memmap_init_zone(unsigned long size, int 
nid, unsigned long zone,
                struct vmem_altmap *altmap)
  {
        unsigned long pfn, end_pfn = start_pfn + size;
-       struct page *page;
if (highest_memmap_pfn < end_pfn - 1)
                highest_memmap_pfn = end_pfn - 1;
+ if (context == MEMMAP_HOTPLUG) {
  #ifdef CONFIG_ZONE_DEVICE
-       /*
-        * Honor reservation requested by the driver for this ZONE_DEVICE
-        * memory. We limit the total number of pages to initialize to just
-        * those that might contain the memory mapping. We will defer the
-        * ZONE_DEVICE page initialization until after we have released
-        * the hotplug lock.
-        */
-       if (zone == ZONE_DEVICE) {
-               if (!altmap)
-                       return;
+               /*
+                * Honor reservation requested by the driver for this
+                * ZONE_DEVICE memory. We limit the total number of pages to
+                * initialize to just those that might contain the memory
+                * mapping. We will defer the ZONE_DEVICE page initialization
+                * until after we have released the hotplug lock.
+                */
+               if (zone == ZONE_DEVICE) {
+                       if (!altmap)
+                               return;
+
+                       if (start_pfn == altmap->base_pfn)
+                               start_pfn += altmap->reserve;
+                       end_pfn = altmap->base_pfn +
+                                 vmem_altmap_offset(altmap);
+               }
+#endif
+               /*
+                * For these ZONE_DEVICE pages we don't need to record the
+                * pgmap as they should represent only those pages used to
+                * store the memory map. The actual ZONE_DEVICE pages will
+                * be initialized later.
+                */
+               __memmap_init_hotplug(end_pfn - start_pfn, nid, zone,
+                                     start_pfn, NULL);
- if (start_pfn == altmap->base_pfn)
-                       start_pfn += altmap->reserve;
-               end_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
+               return;
        }
-#endif
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+               struct page *page;
+
                /*
                 * There can be holes in boot-time mem_map[]s handed to this
                 * function.  They do not exist on hotplugged memory.
                 */
-               if (context == MEMMAP_EARLY) {
-                       if (!early_pfn_valid(pfn)) {
-                               pfn = next_valid_pfn(pfn) - 1;
-                               continue;
-                       }
-                       if (!early_pfn_in_nid(pfn, nid))
-                               continue;
-                       if (overlap_memmap_init(zone, &pfn))
-                               continue;
-                       if (defer_init(nid, pfn, end_pfn))
-                               break;
+               if (!early_pfn_valid(pfn)) {
+                       pfn = next_valid_pfn(pfn) - 1;
+                       continue;
                }
+               if (!early_pfn_in_nid(pfn, nid))
+                       continue;
+               if (overlap_memmap_init(zone, &pfn))
+                       continue;
+               if (defer_init(nid, pfn, end_pfn))
+                       break;
page = pfn_to_page(pfn);
                __init_single_page(page, pfn, zone, nid);
-               if (context == MEMMAP_HOTPLUG)
-                       __SetPageReserved(page);
/*
                 * Mark the block movable so that blocks are reserved for
@@ -5594,14 +5722,12 @@ void __ref memmap_init_zone_device(struct zone *zone,
                                   unsigned long size,
                                   struct dev_pagemap *pgmap)
  {
-       unsigned long pfn, end_pfn = start_pfn + size;
        struct pglist_data *pgdat = zone->zone_pgdat;
        unsigned long zone_idx = zone_idx(zone);
        unsigned long start = jiffies;
        int nid = pgdat->node_id;
- if (WARN_ON_ONCE(!pgmap || !is_dev_zone(zone)))
-               return;
+       VM_BUG_ON(!is_dev_zone(zone));
/*
         * The call to memmap_init_zone should have already taken care
@@ -5610,53 +5736,13 @@ void __ref memmap_init_zone_device(struct zone *zone,
         */
        if (pgmap->altmap_valid) {
                struct vmem_altmap *altmap = &pgmap->altmap;
+               unsigned long end_pfn = start_pfn + size;
start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
                size = end_pfn - start_pfn;
        }
- for (pfn = start_pfn; pfn < end_pfn; pfn++) {
-               struct page *page = pfn_to_page(pfn);
-
-               __init_single_page(page, pfn, zone_idx, nid);
-
-               /*
-                * Mark page reserved as it will need to wait for onlining
-                * phase for it to be fully associated with a zone.
-                *
-                * We can use the non-atomic __set_bit operation for setting
-                * the flag as we are still initializing the pages.
-                */
-               __SetPageReserved(page);
-
-               /*
-                * ZONE_DEVICE pages union ->lru with a ->pgmap back
-                * pointer and hmm_data.  It is a bug if a ZONE_DEVICE
-                * page is ever freed or placed on a driver-private list.
-                */
-               page->pgmap = pgmap;
-               page->hmm_data = 0;
-
-               /*
-                * Mark the block movable so that blocks are reserved for
-                * movable at startup. This will force kernel allocations
-                * to reserve their blocks rather than leaking throughout
-                * the address space during boot when many long-lived
-                * kernel allocations are made.
-                *
-                * bitmap is created for zone's valid pfn range. but memmap
-                * can be created for invalid pages (for alignment)
-                * check here not to call set_pageblock_migratetype() against
-                * pfn out of zone.
-                *
-                * Please note that MEMMAP_HOTPLUG path doesn't clear memmap
-                * because this is done early in sparse_add_one_section
-                */
-               if (!(pfn & (pageblock_nr_pages - 1))) {
-                       set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-                       cond_resched();
-               }
-       }
+       __memmap_init_hotplug(size, nid, zone_idx, start_pfn, pgmap);
pr_info("%s initialised, %lu pages in %ums\n", dev_name(pgmap->dev),
                size, jiffies_to_msecs(jiffies - start));


Reply via email to