Re: [PATCH v2] mm: Fix a hmm_range_fault() livelock / starvation problem

kernel test robot Tue, 03 Feb 2026 05:05:24 -0800

Hi Thomas,

kernel test robot noticed the following build errors:


[auto build test ERROR on akpm-mm/mm-everything]

url:    
https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/mm-Fix-a-hmm_range_fault-livelock-starvation-problem/20260203-184803
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git 
mm-everything
patch link:    
https://lore.kernel.org/r/20260203104532.98534-1-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH v2] mm: Fix a hmm_range_fault() livelock / starvation 
problem
config: x86_64-allnoconfig 
(https://download.01.org/0day-ci/archive/20260203/[email protected]/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 
87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): 
(https://download.01.org/0day-ci/archive/20260203/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: 
https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

>> mm/memory.c:4769:5: error: call to undeclared function 
>> 'migration_entry_wait_on_locked'; ISO C99 and later do not support implicit 
>> function declarations [-Wimplicit-function-declaration]
    4769 |                                 
migration_entry_wait_on_locked(entry, vmf->ptl);
         |                                 ^
   mm/memory.c:4769:5: note: did you mean 'migration_entry_wait_huge'?
   include/linux/swapops.h:237:20: note: 'migration_entry_wait_huge' declared 
here
     237 | static inline void migration_entry_wait_huge(struct vm_area_struct 
*vma,
         |                    ^
   1 error generated.


vim +/migration_entry_wait_on_locked +4769 mm/memory.c

  4699  
  4700  /*
  4701   * We enter with non-exclusive mmap_lock (to exclude vma changes,
  4702   * but allow concurrent faults), and pte mapped but not yet locked.
  4703   * We return with pte unmapped and unlocked.
  4704   *
  4705   * We return with the mmap_lock locked or unlocked in the same cases
  4706   * as does filemap_fault().
  4707   */
  4708  vm_fault_t do_swap_page(struct vm_fault *vmf)
  4709  {
  4710          struct vm_area_struct *vma = vmf->vma;
  4711          struct folio *swapcache = NULL, *folio;
  4712          struct page *page;
  4713          struct swap_info_struct *si = NULL;
  4714          rmap_t rmap_flags = RMAP_NONE;
  4715          bool exclusive = false;
  4716          softleaf_t entry;
  4717          pte_t pte;
  4718          vm_fault_t ret = 0;
  4719          int nr_pages;
  4720          unsigned long page_idx;
  4721          unsigned long address;
  4722          pte_t *ptep;
  4723  
  4724          if (!pte_unmap_same(vmf))
  4725                  goto out;
  4726  
  4727          entry = softleaf_from_pte(vmf->orig_pte);
  4728          if (unlikely(!softleaf_is_swap(entry))) {
  4729                  if (softleaf_is_migration(entry)) {
  4730                          migration_entry_wait(vma->vm_mm, vmf->pmd,
  4731                                               vmf->address);
  4732                  } else if (softleaf_is_device_exclusive(entry)) {
  4733                          vmf->page = softleaf_to_page(entry);
  4734                          ret = remove_device_exclusive_entry(vmf);
  4735                  } else if (softleaf_is_device_private(entry)) {
  4736                          if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
  4737                                  /*
  4738                                   * migrate_to_ram is not yet ready to 
operate
  4739                                   * under VMA lock.
  4740                                   */
  4741                                  vma_end_read(vma);
  4742                                  ret = VM_FAULT_RETRY;
  4743                                  goto out;
  4744                          }
  4745  
  4746                          vmf->page = softleaf_to_page(entry);
  4747                          vmf->pte = pte_offset_map_lock(vma->vm_mm, 
vmf->pmd,
  4748                                          vmf->address, &vmf->ptl);
  4749                          if (unlikely(!vmf->pte ||
  4750                                       !pte_same(ptep_get(vmf->pte),
  4751                                                          vmf->orig_pte)))
  4752                                  goto unlock;
  4753  
  4754                          /*
  4755                           * Get a page reference while we know the page 
can't be
  4756                           * freed.
  4757                           */
  4758                          if (trylock_page(vmf->page)) {
  4759                                  struct dev_pagemap *pgmap;
  4760  
  4761                                  get_page(vmf->page);
  4762                                  pte_unmap_unlock(vmf->pte, vmf->ptl);
  4763                                  pgmap = page_pgmap(vmf->page);
  4764                                  ret = pgmap->ops->migrate_to_ram(vmf);
  4765                                  unlock_page(vmf->page);
  4766                                  put_page(vmf->page);
  4767                          } else {
  4768                                  pte_unmap(vmf->pte);
> 4769                                  migration_entry_wait_on_locked(entry, 
> vmf->ptl);
  4770                          }
  4771                  } else if (softleaf_is_hwpoison(entry)) {
  4772                          ret = VM_FAULT_HWPOISON;
  4773                  } else if (softleaf_is_marker(entry)) {
  4774                          ret = handle_pte_marker(vmf);
  4775                  } else {
  4776                          print_bad_pte(vma, vmf->address, vmf->orig_pte, 
NULL);
  4777                          ret = VM_FAULT_SIGBUS;
  4778                  }
  4779                  goto out;
  4780          }
  4781  
  4782          /* Prevent swapoff from happening to us. */
  4783          si = get_swap_device(entry);
  4784          if (unlikely(!si))
  4785                  goto out;
  4786  
  4787          folio = swap_cache_get_folio(entry);
  4788          if (folio)
  4789                  swap_update_readahead(folio, vma, vmf->address);
  4790          if (!folio) {
  4791                  if (data_race(si->flags & SWP_SYNCHRONOUS_IO)) {
  4792                          folio = alloc_swap_folio(vmf);
  4793                          if (folio) {
  4794                                  /*
  4795                                   * folio is charged, so swapin can only 
fail due
  4796                                   * to raced swapin and return NULL.
  4797                                   */
  4798                                  swapcache = swapin_folio(entry, folio);
  4799                                  if (swapcache != folio)
  4800                                          folio_put(folio);
  4801                                  folio = swapcache;
  4802                          }
  4803                  } else {
  4804                          folio = swapin_readahead(entry, 
GFP_HIGHUSER_MOVABLE, vmf);
  4805                  }
  4806  
  4807                  if (!folio) {
  4808                          /*
  4809                           * Back out if somebody else faulted in this pte
  4810                           * while we released the pte lock.
  4811                           */
  4812                          vmf->pte = pte_offset_map_lock(vma->vm_mm, 
vmf->pmd,
  4813                                          vmf->address, &vmf->ptl);
  4814                          if (likely(vmf->pte &&
  4815                                     pte_same(ptep_get(vmf->pte), 
vmf->orig_pte)))
  4816                                  ret = VM_FAULT_OOM;
  4817                          goto unlock;
  4818                  }
  4819  
  4820                  /* Had to read the page from swap area: Major fault */
  4821                  ret = VM_FAULT_MAJOR;
  4822                  count_vm_event(PGMAJFAULT);
  4823                  count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
  4824          }
  4825  
  4826          swapcache = folio;
  4827          ret |= folio_lock_or_retry(folio, vmf);
  4828          if (ret & VM_FAULT_RETRY)
  4829                  goto out_release;
  4830  
  4831          page = folio_file_page(folio, swp_offset(entry));
  4832          /*
  4833           * Make sure folio_free_swap() or swapoff did not release the
  4834           * swapcache from under us.  The page pin, and pte_same test
  4835           * below, are not enough to exclude that.  Even if it is still
  4836           * swapcache, we need to check that the page's swap has not
  4837           * changed.
  4838           */
  4839          if (unlikely(!folio_matches_swap_entry(folio, entry)))
  4840                  goto out_page;
  4841  
  4842          if (unlikely(PageHWPoison(page))) {
  4843                  /*
  4844                   * hwpoisoned dirty swapcache pages are kept for killing
  4845                   * owner processes (which may be unknown at hwpoison 
time)
  4846                   */
  4847                  ret = VM_FAULT_HWPOISON;
  4848                  goto out_page;
  4849          }
  4850  
  4851          /*
  4852           * KSM sometimes has to copy on read faults, for example, if
  4853           * folio->index of non-ksm folios would be nonlinear inside the
  4854           * anon VMA -- the ksm flag is lost on actual swapout.
  4855           */
  4856          folio = ksm_might_need_to_copy(folio, vma, vmf->address);
  4857          if (unlikely(!folio)) {
  4858                  ret = VM_FAULT_OOM;
  4859                  folio = swapcache;
  4860                  goto out_page;
  4861          } else if (unlikely(folio == ERR_PTR(-EHWPOISON))) {
  4862                  ret = VM_FAULT_HWPOISON;
  4863                  folio = swapcache;
  4864                  goto out_page;
  4865          } else if (folio != swapcache)
  4866                  page = folio_page(folio, 0);
  4867  
  4868          /*
  4869           * If we want to map a page that's in the swapcache writable, we
  4870           * have to detect via the refcount if we're really the exclusive
  4871           * owner. Try removing the extra reference from the local LRU
  4872           * caches if required.
  4873           */
  4874          if ((vmf->flags & FAULT_FLAG_WRITE) &&
  4875              !folio_test_ksm(folio) && !folio_test_lru(folio))
  4876                  lru_add_drain();
  4877  
  4878          folio_throttle_swaprate(folio, GFP_KERNEL);
  4879  
  4880          /*
  4881           * Back out if somebody else already faulted in this pte.
  4882           */
  4883          vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, 
vmf->address,
  4884                          &vmf->ptl);
  4885          if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), 
vmf->orig_pte)))
  4886                  goto out_nomap;
  4887  
  4888          if (unlikely(!folio_test_uptodate(folio))) {
  4889                  ret = VM_FAULT_SIGBUS;
  4890                  goto out_nomap;
  4891          }
  4892  
  4893          nr_pages = 1;
  4894          page_idx = 0;
  4895          address = vmf->address;
  4896          ptep = vmf->pte;
  4897          if (folio_test_large(folio) && folio_test_swapcache(folio)) {
  4898                  int nr = folio_nr_pages(folio);
  4899                  unsigned long idx = folio_page_idx(folio, page);
  4900                  unsigned long folio_start = address - idx * PAGE_SIZE;
  4901                  unsigned long folio_end = folio_start + nr * PAGE_SIZE;
  4902                  pte_t *folio_ptep;
  4903                  pte_t folio_pte;
  4904  
  4905                  if (unlikely(folio_start < max(address & PMD_MASK, 
vma->vm_start)))
  4906                          goto check_folio;
  4907                  if (unlikely(folio_end > pmd_addr_end(address, 
vma->vm_end)))
  4908                          goto check_folio;
  4909  
  4910                  folio_ptep = vmf->pte - idx;
  4911                  folio_pte = ptep_get(folio_ptep);
  4912                  if (!pte_same(folio_pte, 
pte_move_swp_offset(vmf->orig_pte, -idx)) ||
  4913                      swap_pte_batch(folio_ptep, nr, folio_pte) != nr)
  4914                          goto check_folio;
  4915  
  4916                  page_idx = idx;
  4917                  address = folio_start;
  4918                  ptep = folio_ptep;
  4919                  nr_pages = nr;
  4920                  entry = folio->swap;
  4921                  page = &folio->page;
  4922          }
  4923  
  4924  check_folio:
  4925          /*
  4926           * PG_anon_exclusive reuses PG_mappedtodisk for anon pages. A 
swap pte
  4927           * must never point at an anonymous page in the swapcache that 
is
  4928           * PG_anon_exclusive. Sanity check that this holds and 
especially, that
  4929           * no filesystem set PG_mappedtodisk on a page in the 
swapcache. Sanity
  4930           * check after taking the PT lock and making sure that nobody
  4931           * concurrently faulted in this page and set PG_anon_exclusive.
  4932           */
  4933          BUG_ON(!folio_test_anon(folio) && 
folio_test_mappedtodisk(folio));
  4934          BUG_ON(folio_test_anon(folio) && PageAnonExclusive(page));
  4935  
  4936          /*
  4937           * If a large folio already belongs to anon mapping, then we
  4938           * can just go on and map it partially.
  4939           * If not, with the large swapin check above failing, the page 
table
  4940           * have changed, so sub pages might got charged to the wrong 
cgroup,
  4941           * or even should be shmem. So we have to free it and fallback.
  4942           * Nothing should have touched it, both anon and shmem checks 
if a
  4943           * large folio is fully appliable before use.
  4944           *
  4945           * This will be removed once we unify folio allocation in the 
swap cache
  4946           * layer, where allocation of a folio stabilizes the swap 
entries.
  4947           */
  4948          if (!folio_test_anon(folio) && folio_test_large(folio) &&
  4949              nr_pages != folio_nr_pages(folio)) {
  4950                  if (!WARN_ON_ONCE(folio_test_dirty(folio)))
  4951                          swap_cache_del_folio(folio);
  4952                  goto out_nomap;
  4953          }
  4954  
  4955          /*
  4956           * Check under PT lock (to protect against concurrent fork() 
sharing
  4957           * the swap entry concurrently) for certainly exclusive pages.
  4958           */
  4959          if (!folio_test_ksm(folio)) {
  4960                  /*
  4961                   * The can_swapin_thp check above ensures all PTE have
  4962                   * same exclusiveness. Checking just one PTE is fine.
  4963                   */
  4964                  exclusive = pte_swp_exclusive(vmf->orig_pte);
  4965                  if (exclusive)
  4966                          check_swap_exclusive(folio, entry, nr_pages);
  4967                  if (folio != swapcache) {
  4968                          /*
  4969                           * We have a fresh page that is not exposed to 
the
  4970                           * swapcache -> certainly exclusive.
  4971                           */
  4972                          exclusive = true;
  4973                  } else if (exclusive && folio_test_writeback(folio) &&
  4974                            data_race(si->flags & SWP_STABLE_WRITES)) {
  4975                          /*
  4976                           * This is tricky: not all swap backends support
  4977                           * concurrent page modifications while under 
writeback.
  4978                           *
  4979                           * So if we stumble over such a page in the 
swapcache
  4980                           * we must not set the page exclusive, 
otherwise we can
  4981                           * map it writable without further checks and 
modify it
  4982                           * while still under writeback.
  4983                           *
  4984                           * For these problematic swap backends, simply 
drop the
  4985                           * exclusive marker: this is perfectly fine as 
we start
  4986                           * writeback only if we fully unmapped the page 
and
  4987                           * there are no unexpected references on the 
page after
  4988                           * unmapping succeeded. After fully unmapped, no
  4989                           * further GUP references (FOLL_GET and 
FOLL_PIN) can
  4990                           * appear, so dropping the exclusive marker and 
mapping
  4991                           * it only R/O is fine.
  4992                           */
  4993                          exclusive = false;
  4994                  }
  4995          }
  4996  
  4997          /*
  4998           * Some architectures may have to restore extra metadata to the 
page
  4999           * when reading from swap. This metadata may be indexed by swap 
entry
  5000           * so this must be called before folio_put_swap().
  5001           */
  5002          arch_swap_restore(folio_swap(entry, folio), folio);
  5003  
  5004          add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages);
  5005          add_mm_counter(vma->vm_mm, MM_SWAPENTS, -nr_pages);
  5006          pte = mk_pte(page, vma->vm_page_prot);
  5007          if (pte_swp_soft_dirty(vmf->orig_pte))
  5008                  pte = pte_mksoft_dirty(pte);
  5009          if (pte_swp_uffd_wp(vmf->orig_pte))
  5010                  pte = pte_mkuffd_wp(pte);
  5011  
  5012          /*
  5013           * Same logic as in do_wp_page(); however, optimize for pages 
that are
  5014           * certainly not shared either because we just allocated them 
without
  5015           * exposing them to the swapcache or because the swap entry 
indicates
  5016           * exclusivity.
  5017           */
  5018          if (!folio_test_ksm(folio) &&
  5019              (exclusive || folio_ref_count(folio) == 1)) {
  5020                  if ((vma->vm_flags & VM_WRITE) && 
!userfaultfd_pte_wp(vma, pte) &&
  5021                      !pte_needs_soft_dirty_wp(vma, pte)) {
  5022                          pte = pte_mkwrite(pte, vma);
  5023                          if (vmf->flags & FAULT_FLAG_WRITE) {
  5024                                  pte = pte_mkdirty(pte);
  5025                                  vmf->flags &= ~FAULT_FLAG_WRITE;
  5026                          }
  5027                  }
  5028                  rmap_flags |= RMAP_EXCLUSIVE;
  5029          }
  5030          folio_ref_add(folio, nr_pages - 1);
  5031          flush_icache_pages(vma, page, nr_pages);
  5032          vmf->orig_pte = pte_advance_pfn(pte, page_idx);
  5033  
  5034          /* ksm created a completely new copy */
  5035          if (unlikely(folio != swapcache)) {
  5036                  folio_add_new_anon_rmap(folio, vma, address, 
RMAP_EXCLUSIVE);
  5037                  folio_add_lru_vma(folio, vma);
  5038                  folio_put_swap(swapcache, NULL);
  5039          } else if (!folio_test_anon(folio)) {
  5040                  /*
  5041                   * We currently only expect !anon folios that are fully
  5042                   * mappable. See the comment after can_swapin_thp above.
  5043                   */
  5044                  VM_WARN_ON_ONCE_FOLIO(folio_nr_pages(folio) != 
nr_pages, folio);
  5045                  VM_WARN_ON_ONCE_FOLIO(folio_mapped(folio), folio);
  5046                  folio_add_new_anon_rmap(folio, vma, address, 
rmap_flags);
  5047                  folio_put_swap(folio, NULL);
  5048          } else {
  5049                  VM_WARN_ON_ONCE(nr_pages != 1 && nr_pages != 
folio_nr_pages(folio));
  5050                  folio_add_anon_rmap_ptes(folio, page, nr_pages, vma, 
address,
  5051                                           rmap_flags);
  5052                  folio_put_swap(folio, nr_pages == 1 ? page : NULL);
  5053          }
  5054  
  5055          VM_BUG_ON(!folio_test_anon(folio) ||
  5056                          (pte_write(pte) && !PageAnonExclusive(page)));
  5057          set_ptes(vma->vm_mm, address, ptep, pte, nr_pages);
  5058          arch_do_swap_page_nr(vma->vm_mm, vma, address,
  5059                          pte, pte, nr_pages);
  5060  
  5061          /*
  5062           * Remove the swap entry and conditionally try to free up the 
swapcache.
  5063           * Do it after mapping, so raced page faults will likely see 
the folio
  5064           * in swap cache and wait on the folio lock.
  5065           */
  5066          if (should_try_to_free_swap(si, folio, vma, nr_pages, 
vmf->flags))
  5067                  folio_free_swap(folio);
  5068  
  5069          folio_unlock(folio);
  5070          if (unlikely(folio != swapcache)) {
  5071                  /*
  5072                   * Hold the lock to avoid the swap entry to be reused
  5073                   * until we take the PT lock for the pte_same() check
  5074                   * (to avoid false positives from pte_same). For
  5075                   * further safety release the lock after the 
folio_put_swap
  5076                   * so that the swap count won't change under a
  5077                   * parallel locked swapcache.
  5078                   */
  5079                  folio_unlock(swapcache);
  5080                  folio_put(swapcache);
  5081          }
  5082  
  5083          if (vmf->flags & FAULT_FLAG_WRITE) {
  5084                  ret |= do_wp_page(vmf);
  5085                  if (ret & VM_FAULT_ERROR)
  5086                          ret &= VM_FAULT_ERROR;
  5087                  goto out;
  5088          }
  5089  
  5090          /* No need to invalidate - it was non-present before */
  5091          update_mmu_cache_range(vmf, vma, address, ptep, nr_pages);
  5092  unlock:
  5093          if (vmf->pte)
  5094                  pte_unmap_unlock(vmf->pte, vmf->ptl);
  5095  out:
  5096          if (si)
  5097                  put_swap_device(si);
  5098          return ret;
  5099  out_nomap:
  5100          if (vmf->pte)
  5101                  pte_unmap_unlock(vmf->pte, vmf->ptl);
  5102  out_page:
  5103          if (folio_test_swapcache(folio))
  5104                  folio_free_swap(folio);
  5105          folio_unlock(folio);
  5106  out_release:
  5107          folio_put(folio);
  5108          if (folio != swapcache) {
  5109                  folio_unlock(swapcache);
  5110                  folio_put(swapcache);
  5111          }
  5112          if (si)
  5113                  put_swap_device(si);
  5114          return ret;
  5115  }
  5116  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

Re: [PATCH v2] mm: Fix a hmm_range_fault() livelock / starvation problem

Reply via email to