On Thu, Nov 05, 2015 at 11:24:59AM +0200, Kirill A. Shutemov wrote:
> On Thu, Nov 05, 2015 at 12:10:13PM +0300, Vladimir Davydov wrote:
> > On Tue, Nov 03, 2015 at 05:26:15PM +0200, Kirill A. Shutemov wrote:
> > ...
> > > @@ -56,23 +56,69 @@ static int page_idle_clear_pte_refs_one(struct page 
> > > *page,
> > >  {
> > >   struct mm_struct *mm = vma->vm_mm;
> > >   spinlock_t *ptl;
> > > + pgd_t *pgd;
> > > + pud_t *pud;
> > >   pmd_t *pmd;
> > >   pte_t *pte;
> > >   bool referenced = false;
> > >  
> > > - if (unlikely(PageTransHuge(page))) {
> > > -         pmd = page_check_address_pmd(page, mm, addr, &ptl);
> > > -         if (pmd) {
> > > -                 referenced = pmdp_clear_young_notify(vma, addr, pmd);
> > > + pgd = pgd_offset(mm, addr);
> > > + if (!pgd_present(*pgd))
> > > +         return SWAP_AGAIN;
> > > + pud = pud_offset(pgd, addr);
> > > + if (!pud_present(*pud))
> > > +         return SWAP_AGAIN;
> > > + pmd = pmd_offset(pud, addr);
> > > +
> > > + if (pmd_trans_huge(*pmd)) {
> > > +         ptl = pmd_lock(mm, pmd);
> > > +                if (!pmd_present(*pmd))
> > > +                 goto unlock_pmd;
> > > +         if (unlikely(!pmd_trans_huge(*pmd))) {
> > >                   spin_unlock(ptl);
> > > +                 goto map_pte;
> > >           }
> > > +
> > > +         if (pmd_page(*pmd) != page)
> > > +                 goto unlock_pmd;
> > > +
> > > +         referenced = pmdp_clear_young_notify(vma, addr, pmd);
> > > +         spin_unlock(ptl);
> > > +         goto found;
> > > +unlock_pmd:
> > > +         spin_unlock(ptl);
> > > +         return SWAP_AGAIN;
> > >   } else {
> > > -         pte = page_check_address(page, mm, addr, &ptl, 0);
> > > -         if (pte) {
> > > -                 referenced = ptep_clear_young_notify(vma, addr, pte);
> > > -                 pte_unmap_unlock(pte, ptl);
> > > -         }
> > > +         pmd_t pmde = *pmd;
> > > +         barrier();
> > > +         if (!pmd_present(pmde) || pmd_trans_huge(pmde))
> > > +                 return SWAP_AGAIN;
> > > +
> > > + }
> > > +map_pte:
> > > + pte = pte_offset_map(pmd, addr);
> > > + if (!pte_present(*pte)) {
> > > +         pte_unmap(pte);
> > > +         return SWAP_AGAIN;
> > >   }
> > > +
> > > + ptl = pte_lockptr(mm, pmd);
> > > + spin_lock(ptl);
> > > +
> > > + if (!pte_present(*pte)) {
> > > +         pte_unmap_unlock(pte, ptl);
> > > +         return SWAP_AGAIN;
> > > + }
> > > +
> > > + /* THP can be referenced by any subpage */
> > > + if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
> > > +         pte_unmap_unlock(pte, ptl);
> > > +         return SWAP_AGAIN;
> > > + }
> > > +
> > > + referenced = ptep_clear_young_notify(vma, addr, pte);
> > > + pte_unmap_unlock(pte, ptl);
> > > +found:
> > 
> > Can't we hide this stuff in a helper function, which would be used by
> > both page_referenced_one and page_idle_clear_pte_refs_one, instead of
> > duplicating page_referenced_one code here?
> 
> I would like to, but there's no obvious way to do that: PMDs and PTEs
> require different handling.
> 
> Any ideas?

Something like this? [COMPLETELY UNTESTED]
---
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 853f4f3c6742..bb9169d07c2b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -216,6 +216,10 @@ static inline pte_t *page_check_address(struct page *page, 
struct mm_struct *mm,
        return ptep;
 }
 
+pte_t *page_check_address_transhuge(struct page *page, struct mm_struct *mm,
+                                   unsigned long address,
+                                   pmd_t **pmdp, spinlock_t **ptlp);
+
 /*
  * Used by swapoff to help locate where page is expected in vma.
  */
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 2c9ebe12b40d..6574ef6a1a96 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -56,69 +56,21 @@ static int page_idle_clear_pte_refs_one(struct page *page,
 {
        struct mm_struct *mm = vma->vm_mm;
        spinlock_t *ptl;
-       pgd_t *pgd;
-       pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
        bool referenced = false;
 
-       pgd = pgd_offset(mm, addr);
-       if (!pgd_present(*pgd))
+       pte = page_check_address_transhuge(page, mm, address, &pmd, &ptl);
+       if (!pte)
                return SWAP_AGAIN;
-       pud = pud_offset(pgd, addr);
-       if (!pud_present(*pud))
-               return SWAP_AGAIN;
-       pmd = pmd_offset(pud, addr);
-
-       if (pmd_trans_huge(*pmd)) {
-               ptl = pmd_lock(mm, pmd);
-                if (!pmd_present(*pmd))
-                       goto unlock_pmd;
-               if (unlikely(!pmd_trans_huge(*pmd))) {
-                       spin_unlock(ptl);
-                       goto map_pte;
-               }
 
-               if (pmd_page(*pmd) != page)
-                       goto unlock_pmd;
-
-               referenced = pmdp_clear_young_notify(vma, addr, pmd);
-               spin_unlock(ptl);
-               goto found;
-unlock_pmd:
-               spin_unlock(ptl);
-               return SWAP_AGAIN;
-       } else {
-               pmd_t pmde = *pmd;
-               barrier();
-               if (!pmd_present(pmde) || pmd_trans_huge(pmde))
-                       return SWAP_AGAIN;
-
-       }
-map_pte:
-       pte = pte_offset_map(pmd, addr);
-       if (!pte_present(*pte)) {
-               pte_unmap(pte);
-               return SWAP_AGAIN;
-       }
+       if (pte == pmd) /* trans huge */
+               referenced = pmdp_clear_young_notify(vma, address, pmd);
+       else
+               referenced = ptep_clear_young_notify(vma, addr, pte);
 
-       ptl = pte_lockptr(mm, pmd);
-       spin_lock(ptl);
-
-       if (!pte_present(*pte)) {
-               pte_unmap_unlock(pte, ptl);
-               return SWAP_AGAIN;
-       }
-
-       /* THP can be referenced by any subpage */
-       if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
-               pte_unmap_unlock(pte, ptl);
-               return SWAP_AGAIN;
-       }
-
-       referenced = ptep_clear_young_notify(vma, addr, pte);
        pte_unmap_unlock(pte, ptl);
-found:
+
        if (referenced) {
                clear_page_idle(page);
                /*
diff --git a/mm/rmap.c b/mm/rmap.c
index 1f90bda685b6..3638190cf7bc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -796,48 +796,35 @@ int page_mapped_in_vma(struct page *page, struct 
vm_area_struct *vma)
        return 1;
 }
 
-struct page_referenced_arg {
-       int mapcount;
-       int referenced;
-       unsigned long vm_flags;
-       struct mem_cgroup *memcg;
-};
-/*
- * arg: page_referenced_arg will be passed
- */
-static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
-                       unsigned long address, void *arg)
+pte_t *page_check_address_transhuge(struct page *page, struct mm_struct *mm,
+                                   unsigned long address,
+                                   pmd_t **pmdp, spinlock_t **ptlp)
 {
-       struct mm_struct *mm = vma->vm_mm;
-       spinlock_t *ptl;
-       int referenced = 0;
-       struct page_referenced_arg *pra = arg;
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
+       spinlock_t *ptl;
 
        if (unlikely(PageHuge(page))) {
                /* when pud is not present, pte will be NULL */
                pte = huge_pte_offset(mm, address);
                if (!pte)
-                       return SWAP_AGAIN;
+                       return NULL;
 
                ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
+               pmd = NULL;
                goto check_pte;
        }
 
        pgd = pgd_offset(mm, address);
        if (!pgd_present(*pgd))
-               return SWAP_AGAIN;
-       pud = pud_offset(pgd, address);
+               return NULL;
        if (!pud_present(*pud))
-               return SWAP_AGAIN;
+               return NULL;
        pmd = pmd_offset(pud, address);
 
        if (pmd_trans_huge(*pmd)) {
-               int ret = SWAP_AGAIN;
-
                ptl = pmd_lock(mm, pmd);
                if (!pmd_present(*pmd))
                        goto unlock_pmd;
@@ -849,30 +836,23 @@ static int page_referenced_one(struct page *page, struct 
vm_area_struct *vma,
                if (pmd_page(*pmd) != page)
                        goto unlock_pmd;
 
-               if (vma->vm_flags & VM_LOCKED) {
-                       pra->vm_flags |= VM_LOCKED;
-                       ret = SWAP_FAIL; /* To break the loop */
-                       goto unlock_pmd;
-               }
-
-               if (pmdp_clear_flush_young_notify(vma, address, pmd))
-                       referenced++;
-               spin_unlock(ptl);
+               pte = (pte_t *)pmd;
                goto found;
 unlock_pmd:
                spin_unlock(ptl);
-               return ret;
+               return NULL;
        } else {
                pmd_t pmde = *pmd;
                barrier();
                if (!pmd_present(pmde) || pmd_trans_huge(pmde))
-                       return SWAP_AGAIN;
+                       return NULL;
        }
+
 map_pte:
        pte = pte_offset_map(pmd, address);
        if (!pte_present(*pte)) {
                pte_unmap(pte);
-               return SWAP_AGAIN;
+               return NULL;
        }
 
        ptl = pte_lockptr(mm, pmd);
@@ -881,35 +861,66 @@ check_pte:
 
        if (!pte_present(*pte)) {
                pte_unmap_unlock(pte, ptl);
-               return SWAP_AGAIN;
+               return NULL;
        }
 
        /* THP can be referenced by any subpage */
        if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
                pte_unmap_unlock(pte, ptl);
-               return SWAP_AGAIN;
+               return NULL;
        }
+found:
+       *ptlp = ptl;
+       *pmdp = pmd;
+       return pte;
+}
+
+struct page_referenced_arg {
+       int mapcount;
+       int referenced;
+       unsigned long vm_flags;
+       struct mem_cgroup *memcg;
+};
+/*
+ * arg: page_referenced_arg will be passed
+ */
+static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
+                       unsigned long address, void *arg)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       int referenced = 0;
+       struct page_referenced_arg *pra = arg;
+       pmd_t *pmd;
+       pte_t *pte;
+       spinlock_t *ptl;
+
+       pte = page_check_address_transhuge(page, mm, address, &pmd, &ptl);
+       if (!pte)
+               return SWAP_AGAIN;
 
        if (vma->vm_flags & VM_LOCKED) {
                pte_unmap_unlock(pte, ptl);
-               pra->vm_flags |= VM_LOCKED;
                return SWAP_FAIL; /* To break the loop */
        }
 
-       if (ptep_clear_flush_young_notify(vma, address, pte)) {
-               /*
-                * Don't treat a reference through a sequentially read
-                * mapping as such.  If the page has been used in
-                * another mapping, we will catch it; if this other
-                * mapping is already gone, the unmap path will have
-                * set PG_referenced or activated the page.
-                */
-               if (likely(!(vma->vm_flags & VM_SEQ_READ)))
+       if (pte == pmd) { /* trans huge */
+               if (pmdp_clear_flush_young_notify(vma, address, pmd))
                        referenced++;
+       } else {
+               if (ptep_clear_flush_young_notify(vma, address, pte)) {
+                       /*
+                        * Don't treat a reference through a sequentially read
+                        * mapping as such.  If the page has been used in
+                        * another mapping, we will catch it; if this other
+                        * mapping is already gone, the unmap path will have
+                        * set PG_referenced or activated the page.
+                        */
+                       if (likely(!(vma->vm_flags & VM_SEQ_READ)))
+                               referenced++;
+               }
        }
        pte_unmap_unlock(pte, ptl);
 
-found:
        if (referenced)
                clear_page_idle(page);
        if (test_and_clear_page_young(page))
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to