From: Huang Ying <ying.hu...@intel.com>

In the patch, the swap writing is enhanced to support to write a
THP (Transparent Huge Page) as a whole.  This is a part of the THP
swap optimization and will improve swap write IO performance for the
more large continuous IOs.

Signed-off-by: "Huang, Ying" <ying.hu...@intel.com>
Cc: Johannes Weiner <han...@cmpxchg.org>
Cc: Minchan Kim <minc...@kernel.org>
Cc: Hugh Dickins <hu...@google.com>
Cc: Shaohua Li <s...@kernel.org>
Cc: Rik van Riel <r...@redhat.com>
Cc: Jens Axboe <ax...@fb.com>
---
 include/linux/page-flags.h    |  4 ++--
 include/linux/vm_event_item.h |  1 +
 mm/page_io.c                  | 21 ++++++++++++++++-----
 mm/vmstat.c                   |  1 +
 4 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d33e3280c8ad..ba2d470d2d0a 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -303,8 +303,8 @@ PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
  * Only test-and-set exist for PG_writeback.  The unconditional operators are
  * risky: they bypass page accounting.
  */
-TESTPAGEFLAG(Writeback, writeback, PF_NO_COMPOUND)
-       TESTSCFLAG(Writeback, writeback, PF_NO_COMPOUND)
+TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL)
+       TESTSCFLAG(Writeback, writeback, PF_NO_TAIL)
 PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
 
 /* PG_readahead is only used for reads; PG_reclaim is only for writes */
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index d84ae90ccd5c..5b5b0f094060 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -84,6 +84,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #endif
                THP_ZERO_PAGE_ALLOC,
                THP_ZERO_PAGE_ALLOC_FAILED,
+               THP_SWPOUT,
 #endif
 #ifdef CONFIG_MEMORY_BALLOON
                BALLOON_INFLATE,
diff --git a/mm/page_io.c b/mm/page_io.c
index 23f6d0d3470f..ec5229fb3607 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -27,16 +27,18 @@
 static struct bio *get_swap_bio(gfp_t gfp_flags,
                                struct page *page, bio_end_io_t end_io)
 {
+       int i, nr = hpage_nr_pages(page);
        struct bio *bio;
 
-       bio = bio_alloc(gfp_flags, 1);
+       bio = bio_alloc(gfp_flags, nr);
        if (bio) {
                bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev);
                bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
                bio->bi_end_io = end_io;
 
-               bio_add_page(bio, page, PAGE_SIZE, 0);
-               BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE);
+               for (i = 0; i < nr; i++)
+                       bio_add_page(bio, page + i, PAGE_SIZE, 0);
+               VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr);
        }
        return bio;
 }
@@ -257,6 +259,15 @@ static sector_t swap_page_sector(struct page *page)
        return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
 }
 
+static inline void count_swpout_vm_event(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       if (unlikely(PageTransHuge(page)))
+               count_vm_event(THP_SWPOUT);
+#endif
+       count_vm_events(PSWPOUT, hpage_nr_pages(page));
+}
+
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
                bio_end_io_t end_write_func)
 {
@@ -308,7 +319,7 @@ int __swap_writepage(struct page *page, struct 
writeback_control *wbc,
 
        ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
        if (!ret) {
-               count_vm_event(PSWPOUT);
+               count_swpout_vm_event(page);
                return 0;
        }
 
@@ -321,7 +332,7 @@ int __swap_writepage(struct page *page, struct 
writeback_control *wbc,
                goto out;
        }
        bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
-       count_vm_event(PSWPOUT);
+       count_swpout_vm_event(page);
        set_page_writeback(page);
        unlock_page(page);
        submit_bio(bio);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c432e581f9a9..ebfd79df1008 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1070,6 +1070,7 @@ const char * const vmstat_text[] = {
 #endif
        "thp_zero_page_alloc",
        "thp_zero_page_alloc_failed",
+       "thp_swpout",
 #endif
 #ifdef CONFIG_MEMORY_BALLOON
        "balloon_inflate",
-- 
2.11.0

Reply via email to