Re: [PATCH 12/27] mm, vmscan: Make shrink_node decisions more node-centric

Michal Hocko Wed, 22 Jun 2016 06:21:58 -0700

On Tue 21-06-16 15:15:51, Mel Gorman wrote:
> Earlier patches focused on having direct reclaim and kswapd use data that
> is node-centric for reclaiming but shrink_node() itself still uses too much
> zone information. This patch removes unnecessary zone-based information
> with the most important decision being whether to continue reclaim or
> not. Some memcg APIs are adjusted as a result even though memcg itself
> still uses some zone information.
> 
> Signed-off-by: Mel Gorman <[email protected]>


Acked-by: Michal Hocko <[email protected]>

> ---
>  include/linux/memcontrol.h |  9 +++----
>  include/linux/mmzone.h     |  4 ++--
>  include/linux/swap.h       |  2 +-
>  mm/memcontrol.c            | 17 +++++++-------
>  mm/page_alloc.c            |  2 +-
>  mm/vmscan.c                | 58 
> ++++++++++++++++++++++++++--------------------
>  mm/workingset.c            |  6 ++---
>  7 files changed, 54 insertions(+), 44 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index cda436c79d8c..a13328851fea 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -306,7 +306,8 @@ void mem_cgroup_uncharge_list(struct list_head 
> *page_list);
>  
>  void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
>  
> -struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
> +struct lruvec *mem_cgroup_lruvec(struct pglist_data *, struct zone *zone,
> +                              struct mem_cgroup *);
>  struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
>  
>  bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
> @@ -573,10 +574,10 @@ static inline void mem_cgroup_migrate(struct page *old, 
> struct page *new)
>  {
>  }
>  
> -static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
> -                                                 struct mem_cgroup *memcg)
> +static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
> +                             struct zone *zone, struct mem_cgroup *memcg)
>  {
> -     return zone_lruvec(zone);
> +     return node_lruvec(pgdat);
>  }
>  
>  static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 890d1858aa22..6991eded0ffd 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -737,9 +737,9 @@ static inline spinlock_t *zone_lru_lock(struct zone *zone)
>       return &zone->zone_pgdat->lru_lock;
>  }
>  
> -static inline struct lruvec *zone_lruvec(struct zone *zone)
> +static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
>  {
> -     return &zone->zone_pgdat->lruvec;
> +     return &pgdat->lruvec;
>  }
>  
>  static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 916e2eddecd6..0ad616d7c381 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -316,7 +316,7 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct 
> mem_cgroup *memcg,
>                                                 unsigned long nr_pages,
>                                                 gfp_t gfp_mask,
>                                                 bool may_swap);
> -extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
> +extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
>                                               gfp_t gfp_mask, bool noswap,
>                                               struct zone *zone,
>                                               unsigned long *nr_scanned);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 864a4e3a82c1..aac5fae56ea4 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -944,22 +944,23 @@ static void invalidate_reclaim_iterators(struct 
> mem_cgroup *dead_memcg)
>            iter = mem_cgroup_iter(NULL, iter, NULL))
>  
>  /**
> - * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
> + * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
> + * @node: node of the wanted lruvec
>   * @zone: zone of the wanted lruvec
>   * @memcg: memcg of the wanted lruvec
>   *
> - * Returns the lru list vector holding pages for the given @zone and
> - * @mem.  This can be the global zone lruvec, if the memory controller
> + * Returns the lru list vector holding pages for a given @node or a given
> + * @memcg and @zone. This can be the node lruvec, if the memory controller
>   * is disabled.
>   */
> -struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
> -                                   struct mem_cgroup *memcg)
> +struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
> +                              struct zone *zone, struct mem_cgroup *memcg)
>  {
>       struct mem_cgroup_per_zone *mz;
>       struct lruvec *lruvec;
>  
>       if (mem_cgroup_disabled()) {
> -             lruvec = zone_lruvec(zone);
> +             lruvec = node_lruvec(pgdat);
>               goto out;
>       }
>  
> @@ -1474,8 +1475,8 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup 
> *root_memcg,
>                       }
>                       continue;
>               }
> -             total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
> -                                                  zone, &nr_scanned);
> +             total += mem_cgroup_shrink_node(victim, gfp_mask, false,
> +                                     zone, &nr_scanned);
>               *total_scanned += nr_scanned;
>               if (!soft_limit_excess(root_memcg))
>                       break;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index e128af8de05f..d62b147fd426 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -5897,6 +5897,7 @@ static void __paginginit free_area_init_core(struct 
> pglist_data *pgdat)
>  #endif
>       pgdat_page_ext_init(pgdat);
>       spin_lock_init(&pgdat->lru_lock);
> +     lruvec_init(node_lruvec(pgdat));
>  
>       for (j = 0; j < MAX_NR_ZONES; j++) {
>               struct zone *zone = pgdat->node_zones + j;
> @@ -5959,7 +5960,6 @@ static void __paginginit free_area_init_core(struct 
> pglist_data *pgdat)
>               /* For bootup, initialized properly in watermark setup */
>               mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);
>  
> -             lruvec_init(zone_lruvec(zone));
>               if (!size)
>                       continue;
>  
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index d42a86e603e8..3774ebf19f63 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2220,10 +2220,11 @@ static inline void init_tlb_ubc(void)
>  /*
>   * This is a basic per-zone page freer.  Used by both kswapd and direct 
> reclaim.
>   */
> -static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg,
> +static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup 
> *memcg,
>                             struct scan_control *sc, unsigned long *lru_pages)
>  {
> -     struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
> +     struct zone *zone = &pgdat->node_zones[sc->reclaim_idx];
> +     struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg);
>       unsigned long nr[NR_LRU_LISTS];
>       unsigned long targets[NR_LRU_LISTS];
>       unsigned long nr_to_scan;
> @@ -2356,13 +2357,14 @@ static bool in_reclaim_compaction(struct scan_control 
> *sc)
>   * calls try_to_compact_zone() that it will have enough free pages to 
> succeed.
>   * It will give up earlier than that if there is difficulty reclaiming pages.
>   */
> -static inline bool should_continue_reclaim(struct zone *zone,
> +static inline bool should_continue_reclaim(struct pglist_data *pgdat,
>                                       unsigned long nr_reclaimed,
>                                       unsigned long nr_scanned,
>                                       struct scan_control *sc)
>  {
>       unsigned long pages_for_compaction;
>       unsigned long inactive_lru_pages;
> +     int z;
>  
>       /* If not in reclaim/compaction mode, stop */
>       if (!in_reclaim_compaction(sc))
> @@ -2396,21 +2398,27 @@ static inline bool should_continue_reclaim(struct 
> zone *zone,
>        * inactive lists are large enough, continue reclaiming
>        */
>       pages_for_compaction = (2UL << sc->order);
> -     inactive_lru_pages = node_page_state(zone->zone_pgdat, 
> NR_INACTIVE_FILE);
> +     inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
>       if (get_nr_swap_pages() > 0)
> -             inactive_lru_pages += node_page_state(zone->zone_pgdat, 
> NR_INACTIVE_ANON);
> +             inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
>       if (sc->nr_reclaimed < pages_for_compaction &&
>                       inactive_lru_pages > pages_for_compaction)
>               return true;
>  
>       /* If compaction would go ahead or the allocation would succeed, stop */
> -     switch (compaction_suitable(zone, sc->order, 0, 0)) {
> -     case COMPACT_PARTIAL:
> -     case COMPACT_CONTINUE:
> -             return false;
> -     default:
> -             return true;
> +     for (z = 0; z <= sc->reclaim_idx; z++) {
> +             struct zone *zone = &pgdat->node_zones[z];
> +
> +             switch (compaction_suitable(zone, sc->order, 0, 
> sc->reclaim_idx)) {
> +             case COMPACT_PARTIAL:
> +             case COMPACT_CONTINUE:
> +                     return false;
> +             default:
> +                     /* check next zone */
> +                     ;
> +             }
>       }
> +     return true;
>  }
>  
>  static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc,
> @@ -2419,15 +2427,14 @@ static bool shrink_node(pg_data_t *pgdat, struct 
> scan_control *sc,
>       struct reclaim_state *reclaim_state = current->reclaim_state;
>       unsigned long nr_reclaimed, nr_scanned;
>       bool reclaimable = false;
> -     struct zone *zone = &pgdat->node_zones[classzone_idx];
>  
>       do {
>               struct mem_cgroup *root = sc->target_mem_cgroup;
>               struct mem_cgroup_reclaim_cookie reclaim = {
> -                     .zone = zone,
> +                     .zone = &pgdat->node_zones[classzone_idx],
>                       .priority = sc->priority,
>               };
> -             unsigned long zone_lru_pages = 0;
> +             unsigned long node_lru_pages = 0;
>               struct mem_cgroup *memcg;
>  
>               nr_reclaimed = sc->nr_reclaimed;
> @@ -2448,11 +2455,11 @@ static bool shrink_node(pg_data_t *pgdat, struct 
> scan_control *sc,
>                       reclaimed = sc->nr_reclaimed;
>                       scanned = sc->nr_scanned;
>  
> -                     shrink_zone_memcg(zone, memcg, sc, &lru_pages);
> -                     zone_lru_pages += lru_pages;
> +                     shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
> +                     node_lru_pages += lru_pages;
>  
>                       if (!global_reclaim(sc) && sc->reclaim_idx == 
> classzone_idx)
> -                             shrink_slab(sc->gfp_mask, zone_to_nid(zone),
> +                             shrink_slab(sc->gfp_mask, pgdat->node_id,
>                                           memcg, sc->nr_scanned - scanned,
>                                           lru_pages);
>  
> @@ -2464,7 +2471,7 @@ static bool shrink_node(pg_data_t *pgdat, struct 
> scan_control *sc,
>                       /*
>                        * Direct reclaim and kswapd have to scan all memory
>                        * cgroups to fulfill the overall scan target for the
> -                      * zone.
> +                      * node.
>                        *
>                        * Limit reclaim, on the other hand, only cares about
>                        * nr_to_reclaim pages to be reclaimed and it will
> @@ -2483,9 +2490,9 @@ static bool shrink_node(pg_data_t *pgdat, struct 
> scan_control *sc,
>                * the eligible LRU pages were scanned.
>                */
>               if (global_reclaim(sc) && sc->reclaim_idx == classzone_idx)
> -                     shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL,
> +                     shrink_slab(sc->gfp_mask, pgdat->node_id, NULL,
>                                   sc->nr_scanned - nr_scanned,
> -                                 zone_lru_pages);
> +                                 node_lru_pages);
>  
>               if (reclaim_state) {
>                       sc->nr_reclaimed += reclaim_state->reclaimed_slab;
> @@ -2500,7 +2507,7 @@ static bool shrink_node(pg_data_t *pgdat, struct 
> scan_control *sc,
>               if (sc->nr_reclaimed - nr_reclaimed)
>                       reclaimable = true;
>  
> -     } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
> +     } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
>                                        sc->nr_scanned - nr_scanned, sc));
>  
>       return reclaimable;
> @@ -2896,7 +2903,7 @@ unsigned long try_to_free_pages(struct zonelist 
> *zonelist, int order,
>  
>  #ifdef CONFIG_MEMCG
>  
> -unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
> +unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
>                                               gfp_t gfp_mask, bool noswap,
>                                               struct zone *zone,
>                                               unsigned long *nr_scanned)
> @@ -2906,6 +2913,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct 
> mem_cgroup *memcg,
>               .target_mem_cgroup = memcg,
>               .may_writepage = !laptop_mode,
>               .may_unmap = 1,
> +             .reclaim_idx = zone_idx(zone),
>               .may_swap = !noswap,
>       };
>       unsigned long lru_pages;
> @@ -2920,11 +2928,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct 
> mem_cgroup *memcg,
>       /*
>        * NOTE: Although we can get the priority field, using it
>        * here is not a good idea, since it limits the pages we can scan.
> -      * if we don't reclaim here, the shrink_zone from balance_pgdat
> +      * if we don't reclaim here, the shrink_node from balance_pgdat
>        * will pick up pages from other mem cgroup's as well. We hack
>        * the priority and make it zero.
>        */
> -     shrink_zone_memcg(zone, memcg, &sc, &lru_pages);
> +     shrink_node_memcg(zone->zone_pgdat, memcg, &sc, &lru_pages);
>  
>       trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
>  
> @@ -2982,7 +2990,7 @@ static void age_active_anon(struct pglist_data *pgdat,
>  
>       memcg = mem_cgroup_iter(NULL, NULL, NULL);
>       do {
> -             struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
> +             struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg);
>  
>               if (inactive_list_is_low(lruvec, false))
>                       shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
> diff --git a/mm/workingset.c b/mm/workingset.c
> index c0820e06aaff..2d81ca11317d 100644
> --- a/mm/workingset.c
> +++ b/mm/workingset.c
> @@ -218,7 +218,7 @@ void *workingset_eviction(struct address_space *mapping, 
> struct page *page)
>       VM_BUG_ON_PAGE(page_count(page), page);
>       VM_BUG_ON_PAGE(!PageLocked(page), page);
>  
> -     lruvec = mem_cgroup_zone_lruvec(zone, memcg);
> +     lruvec = mem_cgroup_lruvec(zone->zone_pgdat, zone, memcg);
>       eviction = atomic_long_inc_return(&lruvec->inactive_age);
>       return pack_shadow(memcgid, zone, eviction);
>  }
> @@ -267,7 +267,7 @@ bool workingset_refault(void *shadow)
>               rcu_read_unlock();
>               return false;
>       }
> -     lruvec = mem_cgroup_zone_lruvec(zone, memcg);
> +     lruvec = mem_cgroup_lruvec(zone->zone_pgdat, zone, memcg);
>       refault = atomic_long_read(&lruvec->inactive_age);
>       active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
>       rcu_read_unlock();
> @@ -317,7 +317,7 @@ void workingset_activation(struct page *page)
>        */
>       if (!mem_cgroup_disabled() && !page_memcg(page))
>               goto out;
> -     lruvec = mem_cgroup_zone_lruvec(page_zone(page), page_memcg(page));
> +     lruvec = mem_cgroup_lruvec(page_pgdat(page), page_zone(page), 
> page_memcg(page));
>       atomic_long_inc(&lruvec->inactive_age);
>  out:
>       unlock_page_memcg(page);
> -- 
> 2.6.4
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected].  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]";> [email protected] </a>

-- 
Michal Hocko
SUSE Labs

Re: [PATCH 12/27] mm, vmscan: Make shrink_node decisions more node-centric

Reply via email to