Hi.

On Wed, Nov 29, 2023 at 01:33:37PM +0000, Alexandru Elisei wrote:
> Hi,
> 
> On Wed, Nov 29, 2023 at 06:10:40PM +0900, Hyesoo Yu wrote:
> > On Sun, Nov 19, 2023 at 04:57:10PM +0000, Alexandru Elisei wrote:
> > > [..]
> > > +static int order_to_num_blocks(int order)
> > > +{
> > > + return max((1 << order) / 32, 1);
> > > +}
> > > [..]
> > > +int reserve_tag_storage(struct page *page, int order, gfp_t gfp)
> > > +{
> > > + unsigned long start_block, end_block;
> > > + struct tag_region *region;
> > > + unsigned long block;
> > > + unsigned long flags;
> > > + unsigned int tries;
> > > + int ret = 0;
> > > +
> > > + VM_WARN_ON_ONCE(!preemptible());
> > > +
> > > + if (page_tag_storage_reserved(page))
> > > +         return 0;
> > > +
> > > + /*
> > > +  * __alloc_contig_migrate_range() ignores gfp when allocating the
> > > +  * destination page for migration. Regardless, massage gfp flags and
> > > +  * remove __GFP_TAGGED to avoid recursion in case gfp stops being
> > > +  * ignored.
> > > +  */
> > > + gfp &= ~__GFP_TAGGED;
> > > + if (!(gfp & __GFP_NORETRY))
> > > +         gfp |= __GFP_RETRY_MAYFAIL;
> > > +
> > > + ret = tag_storage_find_block(page, &start_block, &region);
> > > + if (WARN_ONCE(ret, "Missing tag storage block for pfn 0x%lx", 
> > > page_to_pfn(page)))
> > > +         return 0;
> > > + end_block = start_block + order_to_num_blocks(order) * 
> > > region->block_size;
> > > +
> > 
> > Hello.
> > 
> > If the page size is 4K,  block size is 2 (block size bytes 8K), and order 
> > is 6,
> > then we need 2 pages for the tag. However according to the equation, 
> > order_to_num_blocks
> > is 2 and block_size is also 2, so end block will be incremented by 4.
> > 
> > However we actually only need 8K of tag, right for 256K ?
> > Could you explain order_to_num_blocks * region->block_size more detail ?
> 
> I think you are correct, thank you for pointing it out. The formula should
> probably be something like:
> 
> static int order_to_num_blocks(int order, u32 block_size)
> {
>       int num_tag_pages = max((1 << order) / 32, 1);
> 
>       return DIV_ROUND_UP(num_tag_pages, block_size);
> }
> 
> and that will make end_block = start_block + 2 in your scenario.
> 
> Does that look correct to you?
> 
> Thanks,
> Alex
> 

That looks great!

Thanks,
Regards.

> > 
> > Thanks,
> > Regards.
> > 
> > > + mutex_lock(&tag_blocks_lock);
> > > +
> > > + /* Check again, this time with the lock held. */
> > > + if (page_tag_storage_reserved(page))
> > > +         goto out_unlock;
> > > +
> > > + /* Make sure existing entries are not freed from out under out feet. */
> > > + xa_lock_irqsave(&tag_blocks_reserved, flags);
> > > + for (block = start_block; block < end_block; block += 
> > > region->block_size) {
> > > +         if (tag_storage_block_is_reserved(block))
> > > +                 block_ref_add(block, region, order);
> > > + }
> > > + xa_unlock_irqrestore(&tag_blocks_reserved, flags);
> > > +
> > > + for (block = start_block; block < end_block; block += 
> > > region->block_size) {
> > > +         /* Refcount incremented above. */
> > > +         if (tag_storage_block_is_reserved(block))
> > > +                 continue;
> > > +
> > > +         tries = 3;
> > > +         while (tries--) {
> > > +                 ret = alloc_contig_range(block, block + 
> > > region->block_size, MIGRATE_CMA, gfp);
> > > +                 if (ret == 0 || ret != -EBUSY)
> > > +                         break;
> > > +         }
> > > +
> > > +         if (ret)
> > > +                 goto out_error;
> > > +
> > > +         ret = tag_storage_reserve_block(block, region, order);
> > > +         if (ret) {
> > > +                 free_contig_range(block, region->block_size);
> > > +                 goto out_error;
> > > +         }
> > > +
> > > +         count_vm_events(CMA_ALLOC_SUCCESS, region->block_size);
> > > + }
> > > +
> > > + page_set_tag_storage_reserved(page, order);
> > > +out_unlock:
> > > + mutex_unlock(&tag_blocks_lock);
> > > +
> > > + return 0;
> > > +
> > > +out_error:
> > > + xa_lock_irqsave(&tag_blocks_reserved, flags);
> > > + for (block = start_block; block < end_block; block += 
> > > region->block_size) {
> > > +         if (tag_storage_block_is_reserved(block) &&
> > > +             block_ref_sub_return(block, region, order) == 1) {
> > > +                 __xa_erase(&tag_blocks_reserved, block);
> > > +                 free_contig_range(block, region->block_size);
> > > +         }
> > > + }
> > > + xa_unlock_irqrestore(&tag_blocks_reserved, flags);
> > > +
> > > + mutex_unlock(&tag_blocks_lock);
> > > +
> > > + count_vm_events(CMA_ALLOC_FAIL, region->block_size);
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +void free_tag_storage(struct page *page, int order)
> > > +{
> > > + unsigned long block, start_block, end_block;
> > > + struct tag_region *region;
> > > + unsigned long flags;
> > > + int ret;
> > > +
> > > + ret = tag_storage_find_block(page, &start_block, &region);
> > > + if (WARN_ONCE(ret, "Missing tag storage block for pfn 0x%lx", 
> > > page_to_pfn(page)))
> > > +         return;
> > > +
> > > + end_block = start_block + order_to_num_blocks(order) * 
> > > region->block_size;
> > > +
> > > + xa_lock_irqsave(&tag_blocks_reserved, flags);
> > > + for (block = start_block; block < end_block; block += 
> > > region->block_size) {
> > > +         if (WARN_ONCE(!tag_storage_block_is_reserved(block),
> > > +             "Block 0x%lx is not reserved for pfn 0x%lx", block, 
> > > page_to_pfn(page)))
> > > +                 continue;
> > > +
> > > +         if (block_ref_sub_return(block, region, order) == 1) {
> > > +                 __xa_erase(&tag_blocks_reserved, block);
> > > +                 free_contig_range(block, region->block_size);
> > > +         }
> > > + }
> > > + xa_unlock_irqrestore(&tag_blocks_reserved, flags);
> > > +}
> > > diff --git a/fs/proc/page.c b/fs/proc/page.c
> > > index 195b077c0fac..e7eb584a9234 100644
> > > --- a/fs/proc/page.c
> > > +++ b/fs/proc/page.c
> > > @@ -221,6 +221,7 @@ u64 stable_page_flags(struct page *page)
> > >  #ifdef CONFIG_ARCH_USES_PG_ARCH_X
> > >   u |= kpf_copy_bit(k, KPF_ARCH_2,        PG_arch_2);
> > >   u |= kpf_copy_bit(k, KPF_ARCH_3,        PG_arch_3);
> > > + u |= kpf_copy_bit(k, KPF_ARCH_4,        PG_arch_4);
> > >  #endif
> > >  
> > >   return u;
> > > diff --git a/include/linux/kernel-page-flags.h 
> > > b/include/linux/kernel-page-flags.h
> > > index 859f4b0c1b2b..4a0d719ffdd4 100644
> > > --- a/include/linux/kernel-page-flags.h
> > > +++ b/include/linux/kernel-page-flags.h
> > > @@ -19,5 +19,6 @@
> > >  #define KPF_SOFTDIRTY            40
> > >  #define KPF_ARCH_2               41
> > >  #define KPF_ARCH_3               42
> > > +#define KPF_ARCH_4               43
> > >  
> > >  #endif /* LINUX_KERNEL_PAGE_FLAGS_H */
> > > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> > > index a88e64acebfe..7915165a51bd 100644
> > > --- a/include/linux/page-flags.h
> > > +++ b/include/linux/page-flags.h
> > > @@ -135,6 +135,7 @@ enum pageflags {
> > >  #ifdef CONFIG_ARCH_USES_PG_ARCH_X
> > >   PG_arch_2,
> > >   PG_arch_3,
> > > + PG_arch_4,
> > >  #endif
> > >   __NR_PAGEFLAGS,
> > >  
> > > diff --git a/include/trace/events/mmflags.h 
> > > b/include/trace/events/mmflags.h
> > > index 6ca0d5ed46c0..ba962fd10a2c 100644
> > > --- a/include/trace/events/mmflags.h
> > > +++ b/include/trace/events/mmflags.h
> > > @@ -125,7 +125,8 @@ IF_HAVE_PG_HWPOISON(hwpoison)                         
> > >                 \
> > >  IF_HAVE_PG_IDLE(idle)                                                    
> > > \
> > >  IF_HAVE_PG_IDLE(young)                                                   
> > > \
> > >  IF_HAVE_PG_ARCH_X(arch_2)                                                
> > > \
> > > -IF_HAVE_PG_ARCH_X(arch_3)
> > > +IF_HAVE_PG_ARCH_X(arch_3)                                                
> > > \
> > > +IF_HAVE_PG_ARCH_X(arch_4)
> > >  
> > >  #define show_page_flags(flags)                                           
> > > \
> > >   (flags) ? __print_flags(flags, "|",                             \
> > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > > index f31f02472396..9beead961a65 100644
> > > --- a/mm/huge_memory.c
> > > +++ b/mm/huge_memory.c
> > > @@ -2474,6 +2474,7 @@ static void __split_huge_page_tail(struct folio 
> > > *folio, int tail,
> > >  #ifdef CONFIG_ARCH_USES_PG_ARCH_X
> > >                    (1L << PG_arch_2) |
> > >                    (1L << PG_arch_3) |
> > > +                  (1L << PG_arch_4) |
> > >  #endif
> > >                    (1L << PG_dirty) |
> > >                    LRU_GEN_MASK | LRU_REFS_MASK));
> > > -- 
> > > 2.42.1
> > > 
> > > 
> 
> 
> 


Reply via email to