From: Huang Ying
When a PMD swap mapping is removed from a huge swap cluster, for
example, unmap a memory range mapped with PMD swap mapping, etc,
free_swap_and_cache() will be called to decrease the reference count
to the huge swap cluster. free_swap_and_cache() may also free or
split the huge swap cluster, and free the corresponding THP in swap
cache if necessary. swap_free() is similar, and shares most
implementation with free_swap_and_cache(). This patch revises
free_swap_and_cache() and swap_free() to implement this.
If the swap cluster has been split already, for example, because of
failing to allocate a THP during swapin, we just decrease one from the
reference count of all swap slots.
Otherwise, we will decrease one from the reference count of all swap
slots and the PMD swap mapping count in cluster_count(). When the
corresponding THP isn't in swap cache, if PMD swap mapping count
becomes 0, the huge swap cluster will be split, and if all swap count
becomes 0, the huge swap cluster will be freed. When the corresponding
THP is in swap cache, if every swap_map[offset] == SWAP_HAS_CACHE, we
will try to delete the THP from swap cache. Which will cause the THP
and the huge swap cluster be freed.
Signed-off-by: "Huang, Ying"
Cc: "Kirill A. Shutemov"
Cc: Andrea Arcangeli
Cc: Michal Hocko
Cc: Johannes Weiner
Cc: Shaohua Li
Cc: Hugh Dickins
Cc: Minchan Kim
Cc: Rik van Riel
Cc: Dave Hansen
Cc: Naoya Horiguchi
Cc: Zi Yan
---
arch/s390/mm/pgtable.c | 2 +-
include/linux/swap.h | 9 ++--
kernel/power/swap.c| 4 +-
mm/madvise.c | 2 +-
mm/memory.c| 4 +-
mm/shmem.c | 6 +--
mm/swapfile.c | 114 +++--
7 files changed, 116 insertions(+), 25 deletions(-)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 301e466e4263..3079a23eef75 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -646,7 +646,7 @@ static void ptep_zap_swap_entry(struct mm_struct *mm,
swp_entry_t entry)
dec_mm_counter(mm, mm_counter(page));
}
- free_swap_and_cache(entry);
+ free_swap_and_cache(entry, false);
}
void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 57aa655ab27d..7ed2c727c9b6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -453,9 +453,9 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t *entry, bool cluster);
extern int swapcache_prepare(swp_entry_t entry, bool cluster);
-extern void swap_free(swp_entry_t);
+extern void swap_free(swp_entry_t entry, bool cluster);
extern void swapcache_free_entries(swp_entry_t *entries, int n);
-extern int free_swap_and_cache(swp_entry_t);
+extern int free_swap_and_cache(swp_entry_t entry, bool cluster);
extern int swap_type_of(dev_t, sector_t, struct block_device **);
extern unsigned int count_swap_pages(int, int);
extern sector_t map_swap_page(struct page *, struct block_device **);
@@ -509,7 +509,8 @@ static inline void show_swap_cache_info(void)
{
}
-#define free_swap_and_cache(e) ({(is_migration_entry(e) ||
is_device_private_entry(e));})
+#define free_swap_and_cache(e, c) \
+ ({(is_migration_entry(e) || is_device_private_entry(e)); })
#define swapcache_prepare(e, c)
\
({(is_migration_entry(e) || is_device_private_entry(e)); })
@@ -527,7 +528,7 @@ static inline int swap_duplicate(swp_entry_t *swp, bool
cluster)
return 0;
}
-static inline void swap_free(swp_entry_t swp)
+static inline void swap_free(swp_entry_t swp, bool cluster)
{
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 1efcb5b0c3ed..f8b4d6df73fd 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -182,7 +182,7 @@ sector_t alloc_swapdev_block(int swap)
offset = swp_offset(get_swap_page_of_type(swap));
if (offset) {
if (swsusp_extents_insert(offset))
- swap_free(swp_entry(swap, offset));
+ swap_free(swp_entry(swap, offset), false);
else
return swapdev_block(swap, offset);
}
@@ -206,7 +206,7 @@ void free_all_swap_pages(int swap)
ext = rb_entry(node, struct swsusp_extent, node);
rb_erase(node, _extents);
for (offset = ext->start; offset <= ext->end; offset++)
- swap_free(swp_entry(swap, offset));
+ swap_free(swp_entry(swap, offset), false);
kfree(ext);
}
diff --git a/mm/madvise.c b/mm/madvise.c
index 4d3c922ea1a1..d18c626b 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -349,7 +349,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long
addr,