[PATCH 1/3] mm/swap: support per memory cgroup swapfiles

2014-03-21 Thread Yu Zhao
From: Suleiman Souhlal 

This patch adds support for per memory cgroup swap file. The swap file
is marked private in swapon() with a new flag SWAP_FLAG_PRIVATE becasue
only the memory cgroup (and its children) that owns it can use it (in
the case of the children that don't own any swap files, they go up the
hierarchy until someone who has swap file set up is found).

The path of the swap file is set by writing to memory.swapfile. Details
of the API can be found in Documentation/cgroups/memory.txt.

Signed-off-by: Suleiman Souhlal 
Signed-off-by: Yu Zhao 
---
 Documentation/cgroups/memory.txt |  15 +++
 include/linux/memcontrol.h   |   2 +
 include/linux/swap.h |  38 +++---
 mm/memcontrol.c  |  76 
 mm/memory.c  |   3 +-
 mm/shmem.c   |   2 +-
 mm/swap_state.c  |   2 +-
 mm/swapfile.c| 241 ++-
 mm/vmscan.c  |   2 +-
 9 files changed, 331 insertions(+), 50 deletions(-)

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 2622115..48a98ad 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -72,6 +72,7 @@ Brief summary of control files.
  memory.move_charge_at_immigrate # set/show controls of moving charges
  memory.oom_control # set/show oom controls.
  memory.numa_stat   # show the number of memory usage per numa node
+ memory.swapfile# set/show swap file
 
  memory.kmem.limit_in_bytes  # set/show hard limit for kernel memory
  memory.kmem.usage_in_bytes  # show current kernel memory allocation
@@ -342,6 +343,20 @@ set:
 admin a unified view of memory, and it is also useful for people who just
 want to track kernel memory usage.
 
+2.8 Private swap files
+
+It's possible to configure a cgroup to swap to a particular file by using
+memory.swapfile.
+
+A value of "default" in memory.swapfile indicates that this cgroup should
+use the default, system-wide, swap files. A value of "none" indicates that
+this cgroup should never swap. Other values are interpreted as the path
+to a private swap file.
+
+The swap file has to be created and swapon() has to be done on it with
+SWAP_FLAG_PRIVATE, before it can be used. This flag ensures that the swap
+file is private and does not get used by others.
+
 3. User Interface
 
 0. Configuration
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index abd0113..ec4879b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -155,6 +155,8 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
 }
 
 bool mem_cgroup_oom_synchronize(bool wait);
+int mem_cgroup_get_page_swap_type(struct page *page);
+void mem_cgroup_remove_swapfile(int type);
 
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 46ba0c6..b6a280e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -23,10 +23,11 @@ struct bio;
 #define SWAP_FLAG_DISCARD  0x1 /* enable discard for swap */
 #define SWAP_FLAG_DISCARD_ONCE 0x2 /* discard swap area at swapon-time */
 #define SWAP_FLAG_DISCARD_PAGES 0x4 /* discard page-clusters after use */
+#define SWAP_FLAG_PRIVATE 0x100/* set if get_swap_page should 
skip */
 
 #define SWAP_FLAGS_VALID   (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
 SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
-SWAP_FLAG_DISCARD_PAGES)
+SWAP_FLAG_DISCARD_PAGES | SWAP_FLAG_PRIVATE)
 
 static inline int current_is_kswapd(void)
 {
@@ -158,8 +159,14 @@ enum {
SWP_FILE= (1 << 7), /* set after swap_activate success */
SWP_AREA_DISCARD = (1 << 8),/* single-time swap area discards */
SWP_PAGE_DISCARD = (1 << 9),/* freed swap page-cluster discards */
+   SWP_PRIVATE = (1 << 10),/* not for general use */
/* add others here before... */
-   SWP_SCANNING= (1 << 10),/* refcount in scan_swap_map */
+   SWP_SCANNING= (1 << 11),/* refcount in scan_swap_map */
+};
+
+enum {
+   SWAP_TYPE_DEFAULT = -1, /* use default/global/system swap file */
+   SWAP_TYPE_NONE = -2,/* swap is disabled */
 };
 
 #define SWAP_CLUSTER_MAX 32UL
@@ -401,22 +408,19 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
struct vm_area_struct *vma, unsigned long addr);
 
 /* linux/mm/swapfile.c */
-extern atomic_long_t nr_swap_pages;
-extern long total_swap_pages;
-
-/* Swap 50% full? Release swapcache more aggressively.. */
-static inline bool vm_swap_full(void)
-{
-   return atomic_long_read(_swap_pages) * 2 < total_swap_pages;
-}
-
+extern bool vm_swap_full(struct page *page);
+extern atomic_long_t nr_public_swap_pages, 

[PATCH 1/3] mm/swap: support per memory cgroup swapfiles

2014-03-21 Thread Yu Zhao
From: Suleiman Souhlal sulei...@google.com

This patch adds support for per memory cgroup swap file. The swap file
is marked private in swapon() with a new flag SWAP_FLAG_PRIVATE becasue
only the memory cgroup (and its children) that owns it can use it (in
the case of the children that don't own any swap files, they go up the
hierarchy until someone who has swap file set up is found).

The path of the swap file is set by writing to memory.swapfile. Details
of the API can be found in Documentation/cgroups/memory.txt.

Signed-off-by: Suleiman Souhlal sulei...@google.com
Signed-off-by: Yu Zhao yuz...@google.com
---
 Documentation/cgroups/memory.txt |  15 +++
 include/linux/memcontrol.h   |   2 +
 include/linux/swap.h |  38 +++---
 mm/memcontrol.c  |  76 
 mm/memory.c  |   3 +-
 mm/shmem.c   |   2 +-
 mm/swap_state.c  |   2 +-
 mm/swapfile.c| 241 ++-
 mm/vmscan.c  |   2 +-
 9 files changed, 331 insertions(+), 50 deletions(-)

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 2622115..48a98ad 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -72,6 +72,7 @@ Brief summary of control files.
  memory.move_charge_at_immigrate # set/show controls of moving charges
  memory.oom_control # set/show oom controls.
  memory.numa_stat   # show the number of memory usage per numa node
+ memory.swapfile# set/show swap file
 
  memory.kmem.limit_in_bytes  # set/show hard limit for kernel memory
  memory.kmem.usage_in_bytes  # show current kernel memory allocation
@@ -342,6 +343,20 @@ set:
 admin a unified view of memory, and it is also useful for people who just
 want to track kernel memory usage.
 
+2.8 Private swap files
+
+It's possible to configure a cgroup to swap to a particular file by using
+memory.swapfile.
+
+A value of default in memory.swapfile indicates that this cgroup should
+use the default, system-wide, swap files. A value of none indicates that
+this cgroup should never swap. Other values are interpreted as the path
+to a private swap file.
+
+The swap file has to be created and swapon() has to be done on it with
+SWAP_FLAG_PRIVATE, before it can be used. This flag ensures that the swap
+file is private and does not get used by others.
+
 3. User Interface
 
 0. Configuration
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index abd0113..ec4879b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -155,6 +155,8 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
 }
 
 bool mem_cgroup_oom_synchronize(bool wait);
+int mem_cgroup_get_page_swap_type(struct page *page);
+void mem_cgroup_remove_swapfile(int type);
 
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 46ba0c6..b6a280e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -23,10 +23,11 @@ struct bio;
 #define SWAP_FLAG_DISCARD  0x1 /* enable discard for swap */
 #define SWAP_FLAG_DISCARD_ONCE 0x2 /* discard swap area at swapon-time */
 #define SWAP_FLAG_DISCARD_PAGES 0x4 /* discard page-clusters after use */
+#define SWAP_FLAG_PRIVATE 0x100/* set if get_swap_page should 
skip */
 
 #define SWAP_FLAGS_VALID   (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
 SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
-SWAP_FLAG_DISCARD_PAGES)
+SWAP_FLAG_DISCARD_PAGES | SWAP_FLAG_PRIVATE)
 
 static inline int current_is_kswapd(void)
 {
@@ -158,8 +159,14 @@ enum {
SWP_FILE= (1  7), /* set after swap_activate success */
SWP_AREA_DISCARD = (1  8),/* single-time swap area discards */
SWP_PAGE_DISCARD = (1  9),/* freed swap page-cluster discards */
+   SWP_PRIVATE = (1  10),/* not for general use */
/* add others here before... */
-   SWP_SCANNING= (1  10),/* refcount in scan_swap_map */
+   SWP_SCANNING= (1  11),/* refcount in scan_swap_map */
+};
+
+enum {
+   SWAP_TYPE_DEFAULT = -1, /* use default/global/system swap file */
+   SWAP_TYPE_NONE = -2,/* swap is disabled */
 };
 
 #define SWAP_CLUSTER_MAX 32UL
@@ -401,22 +408,19 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
struct vm_area_struct *vma, unsigned long addr);
 
 /* linux/mm/swapfile.c */
-extern atomic_long_t nr_swap_pages;
-extern long total_swap_pages;
-
-/* Swap 50% full? Release swapcache more aggressively.. */
-static inline bool vm_swap_full(void)
-{
-   return atomic_long_read(nr_swap_pages) * 2  total_swap_pages;
-}
-
+extern bool vm_swap_full(struct page *page);
+extern