On Mon, Feb 27, 2023 at 07:45:08PM +0000, Christophe Leroy wrote:
> Hi,
> 
> Le 27/02/2023 à 18:57, Matthew Wilcox (Oracle) a écrit :
> > Add set_ptes(), update_mmu_cache_range() and flush_dcache_folio().
> > Change the PG_arch_1 (aka PG_dcache_dirty) flag from being per-page to
> > per-folio.
> > 
> > I'm unsure about my merging of flush_dcache_icache_hugepage() and
> > flush_dcache_icache_page() into flush_dcache_icache_folio() and subsequent
> > removal of flush_dcache_icache_phys().  Please review.
> 
> Not sure why you want to remove flush_dcache_icache_phys().

Well, I didn't, necessarily.  It's just that when I merged
flush_dcache_icache_hugepage() and flush_dcache_icache_page()
together, it was left with no callers.

> Allthough that's only feasible when address bus is not wider than 32 
> bits and cannot be done on BOOKE as you can't switch off MMU on BOOKE, 
> flush_dcache_icache_phys() allows to flush not mapped pages without 
> having to map them. So it is more efficient.

And it was just never done for the hugepage case?

> > @@ -148,17 +103,20 @@ static void __flush_dcache_icache(void *p)
> >     invalidate_icache_range(addr, addr + PAGE_SIZE);
> >   }
> >   
> > -static void flush_dcache_icache_hugepage(struct page *page)
> > +void flush_dcache_icache_folio(struct folio *folio)
> >   {
> > -   int i;
> > -   int nr = compound_nr(page);
> > +   unsigned int i, nr = folio_nr_pages(folio);
> >   
> > -   if (!PageHighMem(page)) {
> > +   if (flush_coherent_icache())
> > +           return;
> > +
> > +   if (!folio_test_highmem(folio)) {
> > +           void *addr = folio_address(folio);
> >             for (i = 0; i < nr; i++)
> > -                   __flush_dcache_icache(lowmem_page_address(page + i));
> > +                   __flush_dcache_icache(addr + i * PAGE_SIZE);
> >     } else {
> >             for (i = 0; i < nr; i++) {
> > -                   void *start = kmap_local_page(page + i);
> > +                   void *start = kmap_local_folio(folio, i * PAGE_SIZE);
> >   
> >                     __flush_dcache_icache(start);
> >                     kunmap_local(start);

So you'd like this to be:

        } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > 
sizeof(void *)) {
                for (i = 0; i < nr; i++) {
                         void *start = kmap_local_folio(folio, i * PAGE_SIZE);
                         __flush_dcache_icache(start);
                         kunmap_local(start);
                }
        } else {
                unsigned long pfn = folio_pfn(folio);
                for (i = 0; i < nr; i++)
                        flush_dcache_icache_phys((pfn + i) * PAGE_SIZE;
        }

(or maybe you'd prefer a flush_dcache_icache_pfn() that doesn't need to
worry about PAGE_MASK).

> > @@ -166,27 +124,6 @@ static void flush_dcache_icache_hugepage(struct page 
> > *page)
> >     }
> >   }
> >   
> > -void flush_dcache_icache_page(struct page *page)
> > -{
> > -   if (flush_coherent_icache())
> > -           return;
> > -
> > -   if (PageCompound(page))
> > -           return flush_dcache_icache_hugepage(page);
> > -
> > -   if (!PageHighMem(page)) {
> > -           __flush_dcache_icache(lowmem_page_address(page));
> > -   } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > 
> > sizeof(void *)) {
> > -           void *start = kmap_local_page(page);
> > -
> > -           __flush_dcache_icache(start);
> > -           kunmap_local(start);
> > -   } else {
> > -           flush_dcache_icache_phys(page_to_phys(page));
> > -   }
> > -}
> > -EXPORT_SYMBOL(flush_dcache_icache_page);
> > -
> >   void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
> >   {
> >     clear_page(page);
> > diff --git a/arch/powerpc/mm/nohash/e500_hugetlbpage.c 
> > b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
> > index 58c8d9849cb1..f3cb91107a47 100644
> > --- a/arch/powerpc/mm/nohash/e500_hugetlbpage.c
> > +++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
> > @@ -178,7 +178,8 @@ book3e_hugetlb_preload(struct vm_area_struct *vma, 
> > unsigned long ea, pte_t pte)
> >    *
> >    * This must always be called with the pte lock held.
> >    */
> > -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, 
> > pte_t *ptep)
> > +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
> > +           pte_t *ptep, unsigned int nr)
> >   {
> >     if (is_vm_hugetlb_page(vma))
> >             book3e_hugetlb_preload(vma, address, *ptep);
> > diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> > index cb2dcdb18f8e..b3c7b874a7a2 100644
> > --- a/arch/powerpc/mm/pgtable.c
> > +++ b/arch/powerpc/mm/pgtable.c
> > @@ -58,7 +58,7 @@ static inline int pte_looks_normal(pte_t pte)
> >     return 0;
> >   }
> >   
> > -static struct page *maybe_pte_to_page(pte_t pte)
> > +static struct folio *maybe_pte_to_folio(pte_t pte)
> >   {
> >     unsigned long pfn = pte_pfn(pte);
> >     struct page *page;
> > @@ -68,7 +68,7 @@ static struct page *maybe_pte_to_page(pte_t pte)
> >     page = pfn_to_page(pfn);
> >     if (PageReserved(page))
> >             return NULL;
> > -   return page;
> > +   return page_folio(page);
> >   }
> >   
> >   #ifdef CONFIG_PPC_BOOK3S
> > @@ -84,12 +84,12 @@ static pte_t set_pte_filter_hash(pte_t pte)
> >     pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
> >     if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) 
> > ||
> >                                    cpu_has_feature(CPU_FTR_NOEXECUTE))) {
> > -           struct page *pg = maybe_pte_to_page(pte);
> > -           if (!pg)
> > +           struct folio *folio = maybe_pte_to_folio(pte);
> > +           if (!folio)
> >                     return pte;
> > -           if (!test_bit(PG_dcache_clean, &pg->flags)) {
> > -                   flush_dcache_icache_page(pg);
> > -                   set_bit(PG_dcache_clean, &pg->flags);
> > +           if (!test_bit(PG_dcache_clean, &folio->flags)) {
> > +                   flush_dcache_icache_folio(folio);
> > +                   set_bit(PG_dcache_clean, &folio->flags);
> >             }
> >     }
> >     return pte;
> > @@ -107,7 +107,7 @@ static pte_t set_pte_filter_hash(pte_t pte) { return 
> > pte; }
> >    */
> >   static inline pte_t set_pte_filter(pte_t pte)
> >   {
> > -   struct page *pg;
> > +   struct folio *folio;
> >   
> >     if (radix_enabled())
> >             return pte;
> > @@ -120,18 +120,18 @@ static inline pte_t set_pte_filter(pte_t pte)
> >             return pte;
> >   
> >     /* If you set _PAGE_EXEC on weird pages you're on your own */
> > -   pg = maybe_pte_to_page(pte);
> > -   if (unlikely(!pg))
> > +   folio = maybe_pte_to_folio(pte);
> > +   if (unlikely(!folio))
> >             return pte;
> >   
> >     /* If the page clean, we move on */
> > -   if (test_bit(PG_dcache_clean, &pg->flags))
> > +   if (test_bit(PG_dcache_clean, &folio->flags))
> >             return pte;
> >   
> >     /* If it's an exec fault, we flush the cache and make it clean */
> >     if (is_exec_fault()) {
> > -           flush_dcache_icache_page(pg);
> > -           set_bit(PG_dcache_clean, &pg->flags);
> > +           flush_dcache_icache_folio(folio);
> > +           set_bit(PG_dcache_clean, &folio->flags);
> >             return pte;
> >     }
> >   
> > @@ -142,7 +142,7 @@ static inline pte_t set_pte_filter(pte_t pte)
> >   static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct 
> > *vma,
> >                                  int dirty)
> >   {
> > -   struct page *pg;
> > +   struct folio *folio;
> >   
> >     if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
> >             return pte;
> > @@ -168,17 +168,17 @@ static pte_t set_access_flags_filter(pte_t pte, 
> > struct vm_area_struct *vma,
> >   #endif /* CONFIG_DEBUG_VM */
> >   
> >     /* If you set _PAGE_EXEC on weird pages you're on your own */
> > -   pg = maybe_pte_to_page(pte);
> > -   if (unlikely(!pg))
> > +   folio = maybe_pte_to_folio(pte);
> > +   if (unlikely(!folio))
> >             goto bail;
> >   
> >     /* If the page is already clean, we move on */
> > -   if (test_bit(PG_dcache_clean, &pg->flags))
> > +   if (test_bit(PG_dcache_clean, &folio->flags))
> >             goto bail;
> >   
> >     /* Clean the page and set PG_dcache_clean */
> > -   flush_dcache_icache_page(pg);
> > -   set_bit(PG_dcache_clean, &pg->flags);
> > +   flush_dcache_icache_folio(folio);
> > +   set_bit(PG_dcache_clean, &folio->flags);
> >   
> >    bail:
> >     return pte_mkexec(pte);
> > @@ -187,8 +187,8 @@ static pte_t set_access_flags_filter(pte_t pte, struct 
> > vm_area_struct *vma,
> >   /*
> >    * set_pte stores a linux PTE into the linux page table.
> >    */
> > -void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
> > -           pte_t pte)
> > +void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
> > +           pte_t pte, unsigned int nr)
> >   {
> >     /*
> >      * Make sure hardware valid bit is not set. We don't do
> > @@ -203,7 +203,14 @@ void set_pte_at(struct mm_struct *mm, unsigned long 
> > addr, pte_t *ptep,
> >     pte = set_pte_filter(pte);
> >   
> >     /* Perform the setting of the PTE */
> > -   __set_pte_at(mm, addr, ptep, pte, 0);
> > +   for (;;) {
> > +           __set_pte_at(mm, addr, ptep, pte, 0);
> > +           if (--nr == 0)
> > +                   break;
> > +           ptep++;
> > +           pte = __pte(pte_val(pte) + PAGE_SIZE);
> > +           addr += PAGE_SIZE;
> > +   }
> >   }
> >   
> >   void unmap_kernel_page(unsigned long va)

Reply via email to