[PATCH -mm -V3 05/21] mm, THP, swap: Support PMD swap mapping in free_swap_and_cache()/swap_free()

2018-05-23 Thread Huang, Ying
From: Huang Ying 

When a PMD swap mapping is removed from a huge swap cluster, for
example, unmap a memory range mapped with PMD swap mapping, etc,
free_swap_and_cache() will be called to decrease the reference count
to the huge swap cluster.  free_swap_and_cache() may also free or
split the huge swap cluster, and free the corresponding THP in swap
cache if necessary.  swap_free() is similar, and shares most
implementation with free_swap_and_cache().  This patch revises
free_swap_and_cache() and swap_free() to implement this.

If the swap cluster has been split already, for example, because of
failing to allocate a THP during swapin, we just decrease one from the
reference count of all swap slots.

Otherwise, we will decrease one from the reference count of all swap
slots and the PMD swap mapping count in cluster_count().  When the
corresponding THP isn't in swap cache, if PMD swap mapping count
becomes 0, the huge swap cluster will be split, and if all swap count
becomes 0, the huge swap cluster will be freed.  When the corresponding
THP is in swap cache, if every swap_map[offset] == SWAP_HAS_CACHE, we
will try to delete the THP from swap cache.  Which will cause the THP
and the huge swap cluster be freed.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
---
 arch/s390/mm/pgtable.c |   2 +-
 include/linux/swap.h   |   9 ++--
 kernel/power/swap.c|   4 +-
 mm/madvise.c   |   2 +-
 mm/memory.c|   4 +-
 mm/shmem.c |   6 +--
 mm/swapfile.c  | 114 +++--
 7 files changed, 116 insertions(+), 25 deletions(-)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 301e466e4263..3079a23eef75 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -646,7 +646,7 @@ static void ptep_zap_swap_entry(struct mm_struct *mm, 
swp_entry_t entry)
 
dec_mm_counter(mm, mm_counter(page));
}
-   free_swap_and_cache(entry);
+   free_swap_and_cache(entry, false);
 }
 
 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 57aa655ab27d..7ed2c727c9b6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -453,9 +453,9 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t *entry, bool cluster);
 extern int swapcache_prepare(swp_entry_t entry, bool cluster);
-extern void swap_free(swp_entry_t);
+extern void swap_free(swp_entry_t entry, bool cluster);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
-extern int free_swap_and_cache(swp_entry_t);
+extern int free_swap_and_cache(swp_entry_t entry, bool cluster);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct page *, struct block_device **);
@@ -509,7 +509,8 @@ static inline void show_swap_cache_info(void)
 {
 }
 
-#define free_swap_and_cache(e) ({(is_migration_entry(e) || 
is_device_private_entry(e));})
+#define free_swap_and_cache(e, c)  \
+   ({(is_migration_entry(e) || is_device_private_entry(e)); })
 #define swapcache_prepare(e, c)
\
({(is_migration_entry(e) || is_device_private_entry(e)); })
 
@@ -527,7 +528,7 @@ static inline int swap_duplicate(swp_entry_t *swp, bool 
cluster)
return 0;
 }
 
-static inline void swap_free(swp_entry_t swp)
+static inline void swap_free(swp_entry_t swp, bool cluster)
 {
 }
 
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 1efcb5b0c3ed..f8b4d6df73fd 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -182,7 +182,7 @@ sector_t alloc_swapdev_block(int swap)
offset = swp_offset(get_swap_page_of_type(swap));
if (offset) {
if (swsusp_extents_insert(offset))
-   swap_free(swp_entry(swap, offset));
+   swap_free(swp_entry(swap, offset), false);
else
return swapdev_block(swap, offset);
}
@@ -206,7 +206,7 @@ void free_all_swap_pages(int swap)
ext = rb_entry(node, struct swsusp_extent, node);
rb_erase(node, _extents);
for (offset = ext->start; offset <= ext->end; offset++)
-   swap_free(swp_entry(swap, offset));
+   

[PATCH -mm -V3 05/21] mm, THP, swap: Support PMD swap mapping in free_swap_and_cache()/swap_free()

2018-05-23 Thread Huang, Ying
From: Huang Ying 

When a PMD swap mapping is removed from a huge swap cluster, for
example, unmap a memory range mapped with PMD swap mapping, etc,
free_swap_and_cache() will be called to decrease the reference count
to the huge swap cluster.  free_swap_and_cache() may also free or
split the huge swap cluster, and free the corresponding THP in swap
cache if necessary.  swap_free() is similar, and shares most
implementation with free_swap_and_cache().  This patch revises
free_swap_and_cache() and swap_free() to implement this.

If the swap cluster has been split already, for example, because of
failing to allocate a THP during swapin, we just decrease one from the
reference count of all swap slots.

Otherwise, we will decrease one from the reference count of all swap
slots and the PMD swap mapping count in cluster_count().  When the
corresponding THP isn't in swap cache, if PMD swap mapping count
becomes 0, the huge swap cluster will be split, and if all swap count
becomes 0, the huge swap cluster will be freed.  When the corresponding
THP is in swap cache, if every swap_map[offset] == SWAP_HAS_CACHE, we
will try to delete the THP from swap cache.  Which will cause the THP
and the huge swap cluster be freed.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
---
 arch/s390/mm/pgtable.c |   2 +-
 include/linux/swap.h   |   9 ++--
 kernel/power/swap.c|   4 +-
 mm/madvise.c   |   2 +-
 mm/memory.c|   4 +-
 mm/shmem.c |   6 +--
 mm/swapfile.c  | 114 +++--
 7 files changed, 116 insertions(+), 25 deletions(-)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 301e466e4263..3079a23eef75 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -646,7 +646,7 @@ static void ptep_zap_swap_entry(struct mm_struct *mm, 
swp_entry_t entry)
 
dec_mm_counter(mm, mm_counter(page));
}
-   free_swap_and_cache(entry);
+   free_swap_and_cache(entry, false);
 }
 
 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 57aa655ab27d..7ed2c727c9b6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -453,9 +453,9 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t *entry, bool cluster);
 extern int swapcache_prepare(swp_entry_t entry, bool cluster);
-extern void swap_free(swp_entry_t);
+extern void swap_free(swp_entry_t entry, bool cluster);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
-extern int free_swap_and_cache(swp_entry_t);
+extern int free_swap_and_cache(swp_entry_t entry, bool cluster);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct page *, struct block_device **);
@@ -509,7 +509,8 @@ static inline void show_swap_cache_info(void)
 {
 }
 
-#define free_swap_and_cache(e) ({(is_migration_entry(e) || 
is_device_private_entry(e));})
+#define free_swap_and_cache(e, c)  \
+   ({(is_migration_entry(e) || is_device_private_entry(e)); })
 #define swapcache_prepare(e, c)
\
({(is_migration_entry(e) || is_device_private_entry(e)); })
 
@@ -527,7 +528,7 @@ static inline int swap_duplicate(swp_entry_t *swp, bool 
cluster)
return 0;
 }
 
-static inline void swap_free(swp_entry_t swp)
+static inline void swap_free(swp_entry_t swp, bool cluster)
 {
 }
 
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 1efcb5b0c3ed..f8b4d6df73fd 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -182,7 +182,7 @@ sector_t alloc_swapdev_block(int swap)
offset = swp_offset(get_swap_page_of_type(swap));
if (offset) {
if (swsusp_extents_insert(offset))
-   swap_free(swp_entry(swap, offset));
+   swap_free(swp_entry(swap, offset), false);
else
return swapdev_block(swap, offset);
}
@@ -206,7 +206,7 @@ void free_all_swap_pages(int swap)
ext = rb_entry(node, struct swsusp_extent, node);
rb_erase(node, _extents);
for (offset = ext->start; offset <= ext->end; offset++)
-   swap_free(swp_entry(swap, offset));
+   swap_free(swp_entry(swap, offset), false);
 
kfree(ext);
}
diff --git a/mm/madvise.c b/mm/madvise.c
index 4d3c922ea1a1..d18c626b 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -349,7 +349,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long 
addr,