Hi Aaron,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on hnaz-linux-mm/master]

url:    
https://github.com/0day-ci/linux/commits/Aaron-Tomlin/mm-page_alloc-try-oom-if-reclaim-is-unable-to-make-forward-progress/20210316-010203
base:   https://github.com/hnaz/linux-mm master
config: arc-randconfig-r024-20210315 (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
        chmod +x ~/bin/make.cross
        # 
https://github.com/0day-ci/linux/commit/77338aaff2606a7715c832545e79370e849e3b4e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review 
Aaron-Tomlin/mm-page_alloc-try-oom-if-reclaim-is-unable-to-make-forward-progress/20210316-010203
        git checkout 77338aaff2606a7715c832545e79370e849e3b4e
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <l...@intel.com>

All error/warnings (new ones prefixed by >>):

   mm/page_alloc.c: In function 'should_reclaim_retry':
>> mm/page_alloc.c:4444:3: error: 'result' undeclared (first use in this 
>> function)
    4444 |   result false;
         |   ^~~~~~
   mm/page_alloc.c:4444:3: note: each undeclared identifier is reported only 
once for each function it appears in
>> mm/page_alloc.c:4444:9: error: expected ';' before 'false'
    4444 |   result false;
         |         ^~~~~~
         |         ;
>> mm/page_alloc.c:4447:50: error: expected ';' before 'for'
    4447 |   return unreserve_highatomic_pageblock(ac, true)
         |                                                  ^
         |                                                  ;
   mm/page_alloc.c:4426:18: warning: unused variable 'z' [-Wunused-variable]
    4426 |  struct zoneref *z;
         |                  ^
   mm/page_alloc.c:4425:15: warning: unused variable 'zone' [-Wunused-variable]
    4425 |  struct zone *zone;
         |               ^~~~
   mm/page_alloc.c: In function '__alloc_pages_slowpath':
>> mm/page_alloc.c:4720:11: error: expected ';' before ':' token
    4720 |   goto oom:
         |           ^
         |           ;
>> mm/page_alloc.c:4556:6: warning: variable 'compaction_retries' set but not 
>> used [-Wunused-but-set-variable]
    4556 |  int compaction_retries;
         |      ^~~~~~~~~~~~~~~~~~
   mm/page_alloc.c: At top level:
   mm/page_alloc.c:6136:23: warning: no previous prototype for 'memmap_init' 
[-Wmissing-prototypes]
    6136 | void __meminit __weak memmap_init(unsigned long size, int nid,
         |                       ^~~~~~~~~~~


vim +/result +4444 mm/page_alloc.c

  4409  
  4410  /*
  4411   * Checks whether it makes sense to retry the reclaim to make a forward 
progress
  4412   * for the given allocation request.
  4413   *
  4414   * We give up when we either have tried MAX_RECLAIM_RETRIES in a row
  4415   * without success, or when we couldn't even meet the watermark if we
  4416   * reclaimed all remaining pages on the LRU lists.
  4417   *
  4418   * Returns true if a retry is viable or false to enter the oom path.
  4419   */
  4420  static inline bool
  4421  should_reclaim_retry(gfp_t gfp_mask, unsigned order,
  4422                       struct alloc_context *ac, int alloc_flags,
  4423                       bool did_some_progress, int *no_progress_loops)
  4424  {
  4425          struct zone *zone;
  4426          struct zoneref *z;
  4427          bool ret = false;
  4428  
  4429          /*
  4430           * Costly allocations might have made a progress but this 
doesn't mean
  4431           * their order will become available due to high fragmentation 
so
  4432           * always increment the no progress counter for them
  4433           */
  4434          if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
  4435                  *no_progress_loops = 0;
  4436          else
  4437                  (*no_progress_loops)++;
  4438  
  4439          /*
  4440           * Make sure we converge to OOM if we cannot make any progress
  4441           * several times in the row.
  4442           */
  4443          if (*no_progress_loops > MAX_RECLAIM_RETRIES)
> 4444                  result false;
  4445          /* Last chance before OOM, try draining highatomic_reserve once 
*/
  4446          else if (*no_progress_loops == MAX_RECLAIM_RETRIES)
> 4447                  return unreserve_highatomic_pageblock(ac, true)
  4448  
  4449          /*
  4450           * Keep reclaiming pages while there is a chance this will lead
  4451           * somewhere.  If none of the target zones can satisfy our 
allocation
  4452           * request even if all reclaimable pages are considered then we 
are
  4453           * screwed and have to go OOM.
  4454           */
  4455          for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
  4456                                  ac->highest_zoneidx, ac->nodemask) {
  4457                  unsigned long available;
  4458                  unsigned long reclaimable;
  4459                  unsigned long min_wmark = min_wmark_pages(zone);
  4460                  bool wmark;
  4461  
  4462                  available = reclaimable = zone_reclaimable_pages(zone);
  4463                  available += zone_page_state_snapshot(zone, 
NR_FREE_PAGES);
  4464  
  4465                  /*
  4466                   * Would the allocation succeed if we reclaimed all
  4467                   * reclaimable pages?
  4468                   */
  4469                  wmark = __zone_watermark_ok(zone, order, min_wmark,
  4470                                  ac->highest_zoneidx, alloc_flags, 
available);
  4471                  trace_reclaim_retry_zone(z, order, reclaimable,
  4472                                  available, min_wmark, 
*no_progress_loops, wmark);
  4473                  if (wmark) {
  4474                          /*
  4475                           * If we didn't make any progress and have a 
lot of
  4476                           * dirty + writeback pages then we should wait 
for
  4477                           * an IO to complete to slow down the reclaim 
and
  4478                           * prevent from pre mature OOM
  4479                           */
  4480                          if (!did_some_progress) {
  4481                                  unsigned long write_pending;
  4482  
  4483                                  write_pending = 
zone_page_state_snapshot(zone,
  4484                                                          
NR_ZONE_WRITE_PENDING);
  4485  
  4486                                  if (2 * write_pending > reclaimable) {
  4487                                          congestion_wait(BLK_RW_ASYNC, 
HZ/10);
  4488                                          return true;
  4489                                  }
  4490                          }
  4491  
  4492                          ret = true;
  4493                          goto out;
  4494                  }
  4495          }
  4496  
  4497  out:
  4498          /*
  4499           * Memory allocation/reclaim might be called from a WQ context 
and the
  4500           * current implementation of the WQ concurrency control doesn't
  4501           * recognize that a particular WQ is congested if the worker 
thread is
  4502           * looping without ever sleeping. Therefore we have to do a 
short sleep
  4503           * here rather than calling cond_resched().
  4504           */
  4505          if (current->flags & PF_WQ_WORKER)
  4506                  schedule_timeout_uninterruptible(1);
  4507          else
  4508                  cond_resched();
  4509          return ret;
  4510  }
  4511  
  4512  static inline bool
  4513  check_retry_cpuset(int cpuset_mems_cookie, struct alloc_context *ac)
  4514  {
  4515          /*
  4516           * It's possible that cpuset's mems_allowed and the nodemask 
from
  4517           * mempolicy don't intersect. This should be normally dealt 
with by
  4518           * policy_nodemask(), but it's possible to race with cpuset 
update in
  4519           * such a way the check therein was true, and then it became 
false
  4520           * before we got our cpuset_mems_cookie here.
  4521           * This assumes that for all allocations, ac->nodemask can come 
only
  4522           * from MPOL_BIND mempolicy (whose documented semantics is to 
be ignored
  4523           * when it does not intersect with the cpuset restrictions) or 
the
  4524           * caller can deal with a violated nodemask.
  4525           */
  4526          if (cpusets_enabled() && ac->nodemask &&
  4527                          
!cpuset_nodemask_valid_mems_allowed(ac->nodemask)) {
  4528                  ac->nodemask = NULL;
  4529                  return true;
  4530          }
  4531  
  4532          /*
  4533           * When updating a task's mems_allowed or mempolicy nodemask, 
it is
  4534           * possible to race with parallel threads in such a way that our
  4535           * allocation can fail while the mask is being updated. If we 
are about
  4536           * to fail, check if the cpuset changed during allocation and 
if so,
  4537           * retry.
  4538           */
  4539          if (read_mems_allowed_retry(cpuset_mems_cookie))
  4540                  return true;
  4541  
  4542          return false;
  4543  }
  4544  
  4545  static inline struct page *
  4546  __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
  4547                                                  struct alloc_context 
*ac)
  4548  {
  4549          bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
  4550          const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
  4551          struct page *page = NULL;
  4552          unsigned int alloc_flags;
  4553          unsigned long did_some_progress;
  4554          enum compact_priority compact_priority;
  4555          enum compact_result compact_result;
> 4556          int compaction_retries;
  4557          int no_progress_loops;
  4558          unsigned int cpuset_mems_cookie;
  4559          int reserve_flags;
  4560  
  4561          /*
  4562           * We also sanity check to catch abuse of atomic reserves being 
used by
  4563           * callers that are not in atomic context.
  4564           */
  4565          if (WARN_ON_ONCE((gfp_mask & 
(__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
  4566                                  (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
  4567                  gfp_mask &= ~__GFP_ATOMIC;
  4568  
  4569  retry_cpuset:
  4570          compaction_retries = 0;
  4571          no_progress_loops = 0;
  4572          compact_priority = DEF_COMPACT_PRIORITY;
  4573          cpuset_mems_cookie = read_mems_allowed_begin();
  4574  
  4575          /*
  4576           * The fast path uses conservative alloc_flags to succeed only 
until
  4577           * kswapd needs to be woken up, and to avoid the cost of 
setting up
  4578           * alloc_flags precisely. So we do that now.
  4579           */
  4580          alloc_flags = gfp_to_alloc_flags(gfp_mask);
  4581  
  4582          /*
  4583           * We need to recalculate the starting point for the zonelist 
iterator
  4584           * because we might have used different nodemask in the fast 
path, or
  4585           * there was a cpuset modification and we are retrying - 
otherwise we
  4586           * could end up iterating over non-eligible zones endlessly.
  4587           */
  4588          ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
  4589                                          ac->highest_zoneidx, 
ac->nodemask);
  4590          if (!ac->preferred_zoneref->zone)
  4591                  goto nopage;
  4592  
  4593          if (alloc_flags & ALLOC_KSWAPD)
  4594                  wake_all_kswapds(order, gfp_mask, ac);
  4595  
  4596          /*
  4597           * The adjusted alloc_flags might result in immediate success, 
so try
  4598           * that first
  4599           */
  4600          page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
  4601          if (page)
  4602                  goto got_pg;
  4603  
  4604          /*
  4605           * For costly allocations, try direct compaction first, as it's 
likely
  4606           * that we have enough base pages and don't need to reclaim. 
For non-
  4607           * movable high-order allocations, do that as well, as 
compaction will
  4608           * try prevent permanent fragmentation by migrating from blocks 
of the
  4609           * same migratetype.
  4610           * Don't try this for allocations that are allowed to ignore
  4611           * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet 
happen.
  4612           */
  4613          if (can_direct_reclaim &&
  4614                          (costly_order ||
  4615                             (order > 0 && ac->migratetype != 
MIGRATE_MOVABLE))
  4616                          && !gfp_pfmemalloc_allowed(gfp_mask)) {
  4617                  page = __alloc_pages_direct_compact(gfp_mask, order,
  4618                                                  alloc_flags, ac,
  4619                                                  INIT_COMPACT_PRIORITY,
  4620                                                  &compact_result);
  4621                  if (page)
  4622                          goto got_pg;
  4623  
  4624                  /*
  4625                   * Checks for costly allocations with __GFP_NORETRY, 
which
  4626                   * includes some THP page fault allocations
  4627                   */
  4628                  if (costly_order && (gfp_mask & __GFP_NORETRY)) {
  4629                          /*
  4630                           * If allocating entire pageblock(s) and 
compaction
  4631                           * failed because all zones are below low 
watermarks
  4632                           * or is prohibited because it recently failed 
at this
  4633                           * order, fail immediately unless the allocator 
has
  4634                           * requested compaction and reclaim retry.
  4635                           *
  4636                           * Reclaim is
  4637                           *  - potentially very expensive because zones 
are far
  4638                           *    below their low watermarks or this is 
part of very
  4639                           *    bursty high order allocations,
  4640                           *  - not guaranteed to help because 
isolate_freepages()
  4641                           *    may not iterate over freed pages as part 
of its
  4642                           *    linear scan, and
  4643                           *  - unlikely to make entire pageblocks free 
on its
  4644                           *    own.
  4645                           */
  4646                          if (compact_result == COMPACT_SKIPPED ||
  4647                              compact_result == COMPACT_DEFERRED)
  4648                                  goto nopage;
  4649  
  4650                          /*
  4651                           * Looks like reclaim/compaction is worth 
trying, but
  4652                           * sync compaction could be very expensive, so 
keep
  4653                           * using async compaction.
  4654                           */
  4655                          compact_priority = INIT_COMPACT_PRIORITY;
  4656                  }
  4657          }
  4658  
  4659  retry:
  4660          /* Ensure kswapd doesn't accidentally go to sleep as long as we 
loop */
  4661          if (alloc_flags & ALLOC_KSWAPD)
  4662                  wake_all_kswapds(order, gfp_mask, ac);
  4663  
  4664          reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
  4665          if (reserve_flags)
  4666                  alloc_flags = current_alloc_flags(gfp_mask, 
reserve_flags);
  4667  
  4668          /*
  4669           * Reset the nodemask and zonelist iterators if memory policies 
can be
  4670           * ignored. These allocations are high priority and system 
rather than
  4671           * user oriented.
  4672           */
  4673          if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) {
  4674                  ac->nodemask = NULL;
  4675                  ac->preferred_zoneref = 
first_zones_zonelist(ac->zonelist,
  4676                                          ac->highest_zoneidx, 
ac->nodemask);
  4677          }
  4678  
  4679          /* Attempt with potentially adjusted zonelist and alloc_flags */
  4680          page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
  4681          if (page)
  4682                  goto got_pg;
  4683  
  4684          /* Caller is not willing to reclaim, we can't balance anything 
*/
  4685          if (!can_direct_reclaim)
  4686                  goto nopage;
  4687  
  4688          /* Avoid recursion of direct reclaim */
  4689          if (current->flags & PF_MEMALLOC)
  4690                  goto nopage;
  4691  
  4692          /* Try direct reclaim and then allocating */
  4693          page = __alloc_pages_direct_reclaim(gfp_mask, order, 
alloc_flags, ac,
  4694                                                          
&did_some_progress);
  4695          if (page)
  4696                  goto got_pg;
  4697  
  4698          /* Try direct compaction and then allocating */
  4699          page = __alloc_pages_direct_compact(gfp_mask, order, 
alloc_flags, ac,
  4700                                          compact_priority, 
&compact_result);
  4701          if (page)
  4702                  goto got_pg;
  4703  
  4704          /* Do not loop if specifically requested */
  4705          if (gfp_mask & __GFP_NORETRY)
  4706                  goto nopage;
  4707  
  4708          /*
  4709           * Do not retry costly high order allocations unless they are
  4710           * __GFP_RETRY_MAYFAIL
  4711           */
  4712          if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
  4713                  goto nopage;
  4714  
  4715          if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
  4716                                   did_some_progress > 0, 
&no_progress_loops))
  4717                  goto retry;
  4718  
  4719          if (should_try_oom(no_progress_loops, compact_result))
  4720                  goto oom:
  4721          /*
  4722           * It doesn't make any sense to retry for the compaction if the 
order-0
  4723           * reclaim is not able to make any progress because the current
  4724           * implementation of the compaction depends on the sufficient 
amount
  4725           * of free memory (see __compaction_suitable)
  4726           */
  4727          if (did_some_progress > 0 &&
  4728                          should_compact_retry(ac, order, alloc_flags,
  4729                                  compact_result, &compact_priority,
  4730                                  &compaction_retries))
  4731                  goto retry;
  4732  
  4733  
  4734          /* Deal with possible cpuset update races before we start OOM 
killing */
  4735          if (check_retry_cpuset(cpuset_mems_cookie, ac))
  4736                  goto retry_cpuset;
  4737  
  4738  oom:
  4739          /* Reclaim has failed us, start killing things */
  4740          page = __alloc_pages_may_oom(gfp_mask, order, ac, 
&did_some_progress);
  4741          if (page)
  4742                  goto got_pg;
  4743  
  4744          /* Avoid allocations with no watermarks from looping endlessly 
*/
  4745          if (tsk_is_oom_victim(current) &&
  4746              (alloc_flags & ALLOC_OOM ||
  4747               (gfp_mask & __GFP_NOMEMALLOC)))
  4748                  goto nopage;
  4749  
  4750          /* Retry as long as the OOM killer is making progress */
  4751          if (did_some_progress) {
  4752                  no_progress_loops = 0;
  4753                  goto retry;
  4754          }
  4755  
  4756  nopage:
  4757          /* Deal with possible cpuset update races before we fail */
  4758          if (check_retry_cpuset(cpuset_mems_cookie, ac))
  4759                  goto retry_cpuset;
  4760  
  4761          /*
  4762           * Make sure that __GFP_NOFAIL request doesn't leak out and 
make sure
  4763           * we always retry
  4764           */
  4765          if (gfp_mask & __GFP_NOFAIL) {
  4766                  /*
  4767                   * All existing users of the __GFP_NOFAIL are 
blockable, so warn
  4768                   * of any new users that actually require GFP_NOWAIT
  4769                   */
  4770                  if (WARN_ON_ONCE(!can_direct_reclaim))
  4771                          goto fail;
  4772  
  4773                  /*
  4774                   * PF_MEMALLOC request from this context is rather 
bizarre
  4775                   * because we cannot reclaim anything and only can loop 
waiting
  4776                   * for somebody to do a work for us
  4777                   */
  4778                  WARN_ON_ONCE(current->flags & PF_MEMALLOC);
  4779  
  4780                  /*
  4781                   * non failing costly orders are a hard requirement 
which we
  4782                   * are not prepared for much so let's warn about these 
users
  4783                   * so that we can identify them and convert them to 
something
  4784                   * else.
  4785                   */
  4786                  WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
  4787  
  4788                  /*
  4789                   * Help non-failing allocations by giving them access 
to memory
  4790                   * reserves but do not use ALLOC_NO_WATERMARKS because 
this
  4791                   * could deplete whole memory reserves which would just 
make
  4792                   * the situation worse
  4793                   */
  4794                  page = __alloc_pages_cpuset_fallback(gfp_mask, order, 
ALLOC_HARDER, ac);
  4795                  if (page)
  4796                          goto got_pg;
  4797  
  4798                  cond_resched();
  4799                  goto retry;
  4800          }
  4801  fail:
  4802          warn_alloc(gfp_mask, ac->nodemask,
  4803                          "page allocation failure: order:%u", order);
  4804  got_pg:
  4805          return page;
  4806  }
  4807  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org

Attachment: .config.gz
Description: application/gzip

Reply via email to