On Wed, Feb 24, 2021 at 05:15:58AM -0800, Andrew Morton wrote:
> On Wed, 24 Feb 2021 01:48:07 -0700 Yu Zhao <yuz...@google.com> wrote:
> 
> > Currently page_lru() uses Page{Active,Unevictable} to determine which
> > lru list a page belongs to. Page{Active,Unevictable} contain
> > compound_head() and therefore page_lru() essentially tests
> > PG_{active,unevictable} against compound_head(page)->flags. Once an
> > lru list is determined, page->lru, rather than
> > compound_head(page)->lru, will be added to or deleted from it.
> > 
> > Though not bug, having compound_head() in page_lru() increases the
> > size of vmlinux by O(KB) because page_lru() is inlined many places.
> > And removing compound_head() entirely from Page{Active,Unevictable}
> > may not be the best option (for the moment) either because there
> > may be other cases that need compound_head(). This patch makes
> > page_lru() and __clear_page_lru_flags(), which are used immediately
> > before and after operations on page->lru, test
> > PG_{active,unevictable} directly against page->flags instead.
> 
> Oh geeze.
> 
> > --- a/include/linux/mm_inline.h
> > +++ b/include/linux/mm_inline.h
> > @@ -46,14 +46,12 @@ static __always_inline void 
> > __clear_page_lru_flags(struct page *page)
> >  {
> >     VM_BUG_ON_PAGE(!PageLRU(page), page);
> >  
> > -   __ClearPageLRU(page);
> > -
> >     /* this shouldn't happen, so leave the flags to bad_page() */
> > -   if (PageActive(page) && PageUnevictable(page))
> > +   if ((page->flags & (BIT(PG_active) | BIT(PG_unevictable))) ==
> > +       (BIT(PG_active) | BIT(PG_unevictable)))
> >             return;
> 
> This isn't very nice.  At the very least we should have (documented!)
> helper functions for this:

You are right. Now when I look at this, I s/dislike/hate/ it.

> /* comment goes here */
> static inline bool RawPageActive(struct page *page)
> {
>       ...
> }
> 
> 
> 
> However.
> 
> Here's what the preprocessor produces for an allmodconfig version of
> PageActive():
> 
> static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) 
> __attribute__((no_instrument_function)) __attribute__((__always_inline__)) 
> int PageActive(struct page *page)
> {
>       return test_bit(PG_active, &({ do { if 
> (__builtin_expect(!!(PagePoisoned(compound_head(page))), 0)) { 
> dump_page(compound_head(page), "VM_BUG_ON_PAGE(" 
> "PagePoisoned(compound_head(page))"")"); do { ({ asm volatile("%c0: nop\n\t" 
> ".pushsection .discard.instr_begin\n\t" ".long %c0b - .\n\t" 
> ".popsection\n\t" : : "i" (373)); }); do { asm volatile("1:\t" ".byte 0x0f, 
> 0x0b" "\n" ".pushsection __bug_table,\"aw\"\n" "2:\t" ".long " "1b" " - 2b" 
> "\t# bug_entry::bug_addr\n" "\t" ".long " "%c0" " - 2b" "\t# 
> bug_entry::file\n" "\t.word %c1" "\t# bug_entry::line\n" "\t.word %c2" "\t# 
> bug_entry::flags\n" "\t.org 2b+%c3\n" ".popsection" : : "i" 
> ("./include/linux/page-flags.h"), "i" (338), "i" (0), "i" (sizeof(struct 
> bug_entry))); } while (0); do { ({ asm volatile("%c0:\n\t" ".pushsection 
> .discard.unreachable\n\t" ".long %c0b - .\n\t" ".popsection\n\t" : : "i" 
> (374)); }); asm volatile(""); __builtin_unreachable(); } while (0); } while 
> (0); } } while (0); compound_head(page); })->flags);
> 
> }
> 
> That's all to test a single bit!

I hear you. Let me spend a couple of days and focus on PG_{lru,active,
unevictable,swapbacked} first. They are mostly used with lru-related
operations and therefore can be switched to a compound_head()-free
policy easily. My estimate is we could save ~8KB by doing so :)

Weaning off compound_head() completely is a larger commitment neither
I or Alex are willing to make at the moment -- I did suggest this to
him last night when I asked him to help test build with GCC, which is
their default compiler (we've switched to Clang).

Another good point he has raised is they did see a slowdown on ARM64
after compound_head() was first introduced. My point is there may be
measurable performance benefit too if we could get rid of those
excessive calls to compound_head(). And I'd be happy to work with
somebody if they are interested in doing this.

Fair?

> Four calls to compound_head().
> 
> Compiling this:
> 
> int wibble(struct page *page)
> {
>       return PageActive(page);
> }
> 
> 
> to the below assembly output (allmodconfig build) and it appears that
> the compiler did not CSE these calls.  Perhaps it would be beneficial
> to give it a bit of help.

Another interesting thing I've noticed is the following change from
patch 10 also makes vmlinux a couple of hundreds bytes larger with
my GCC 4.9.x.

-unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int 
zone_idx)
+static unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru,
+                                    int zone_idx)

> This is all nuts.  How much of this inlining is really justifiable?  Do
> we know we wouldn't get a better kernel if we did
> 
>       mv mm-inline.h mm-not-inline-any-more.c
> 
> ?
> 
> Methinks that mm-inline.c needs some serious work...

Agreed. I'll send another series of patches on top of the lru cleanup
series this week.

>       .type   wibble, @function
> wibble:
> 1:    call    __fentry__
>       .section __mcount_loc, "a",@progbits
>       .quad 1b
>       .previous
>       pushq   %r12    #
>       pushq   %rbp    #
>       pushq   %rbx    #
> # mm/swap.c:1156: {
>       movq    %rdi, %rbx      # page, page
>       movq    %rbx, %rbp      # page, _14
> # ./include/linux/page-flags.h:184:   unsigned long head = 
> READ_ONCE(page->compound_head);
>       call    __sanitizer_cov_trace_pc        #
>       movq    8(%rbx), %r12   # page_2(D)->D.14210.D.14188.compound_head, _8
> # ./include/linux/page-flags.h:186:   if (unlikely(head & 1))
>       testb   $1, %r12b       #, _8
>       je      .L2945  #,
> # ./include/linux/page-flags.h:187:           return (struct page *) (head - 
> 1);
>       call    __sanitizer_cov_trace_pc        #
>       leaq    -1(%r12), %rbp  #, _14
>       jmp     .L2945  #
> .L2945:
>       call    __sanitizer_cov_trace_pc        #
> # ./include/linux/page-flags.h:338: PAGEFLAG(Active, active, PF_HEAD) 
> __CLEARPAGEFLAG(Active, active, PF_HEAD)
>       cmpq    $-1, 0(%rbp)    #, MEM[(long unsigned int *)_15]
>       jne     .L2946  #,
> # ./include/linux/page-flags.h:184:   unsigned long head = 
> READ_ONCE(page->compound_head);
>       call    __sanitizer_cov_trace_pc        #
>       movq    8(%rbx), %rbp   #, _16
> # ./include/linux/page-flags.h:186:   if (unlikely(head & 1))
>       testb   $1, %bpl        #, _16
>       je      .L2947  #,
> # ./include/linux/page-flags.h:187:           return (struct page *) (head - 
> 1);
>       leaq    -1(%rbp), %rbx  #, page
>       call    __sanitizer_cov_trace_pc        #
> .L2947:
> # ./include/linux/page-flags.h:338: PAGEFLAG(Active, active, PF_HEAD) 
> __CLEARPAGEFLAG(Active, active, PF_HEAD)
>       call    __sanitizer_cov_trace_pc        #
>       movq    $.LC20, %rsi    #,
>       movq    %rbx, %rdi      # page,
>       call    dump_page       #
> #APP
> # 338 "./include/linux/page-flags.h" 1
>       373: nop        #
>       .pushsection .discard.instr_begin
>       .long 373b - .  #
>       .popsection
>       
> # 0 "" 2
> # 338 "./include/linux/page-flags.h" 1
>       1:      .byte 0x0f, 0x0b
> .pushsection __bug_table,"aw"
> 2:    .long 1b - 2b   # bug_entry::bug_addr
>       .long .LC3 - 2b # bug_entry::file       #
>       .word 338       # bug_entry::line       #
>       .word 0 # bug_entry::flags      #
>       .org 2b+12      #
> .popsection
> # 0 "" 2
> # 338 "./include/linux/page-flags.h" 1
>       374:    #
>       .pushsection .discard.unreachable
>       .long 374b - .  #
>       .popsection
>       
> # 0 "" 2
> #NO_APP
> .L2946:
> # ./include/linux/page-flags.h:184:   unsigned long head = 
> READ_ONCE(page->compound_head);
>       call    __sanitizer_cov_trace_pc        #
>       movq    8(%rbx), %rbp   #, _28
> # ./include/linux/page-flags.h:186:   if (unlikely(head & 1))
>       testb   $1, %bpl        #, _28
>       je      .L2948  #,
> # ./include/linux/page-flags.h:187:           return (struct page *) (head - 
> 1);
>       leaq    -1(%rbp), %rbx  #, page
>       call    __sanitizer_cov_trace_pc        #
> .L2948:
> # ./arch/x86/include/asm/bitops.h:207:                (addr[nr >> 
> _BITOPS_LONG_SHIFT])) != 0;
>       call    __sanitizer_cov_trace_pc        #
>       movq    (%rbx), %rax    # MEM[(const long unsigned int *)_35], _24
> # mm/swap.c:1158: }
>       popq    %rbx    #
>       popq    %rbp    #
> # ./arch/x86/include/asm/bitops.h:207:                (addr[nr >> 
> _BITOPS_LONG_SHIFT])) != 0;
>       shrq    $5, %rax        #, tmp107
> # ./include/linux/page-flags.h:338: PAGEFLAG(Active, active, PF_HEAD) 
> __CLEARPAGEFLAG(Active, active, PF_HEAD)
>       andl    $1, %eax        #, tmp106
> # mm/swap.c:1158: }
>       popq    %r12    #
>       ret
> 

Reply via email to