[PATCH 8/8] mm: memcontrol: hook up vmpressure to socket pressure

2015-11-04 Thread Johannes Weiner
Let the networking stack know when a memcg is under reclaim pressure
so that it can clamp its transmit windows accordingly.

Whenever the reclaim efficiency of a cgroup's LRU lists drops low
enough for a MEDIUM or HIGH vmpressure event to occur, assert a
pressure state in the socket and tcp memory code that tells it to curb
consumption growth from sockets associated with said control group.

vmpressure events are naturally edge triggered, so for hysteresis
assert socket pressure for a second to allow for subsequent vmpressure
events to occur before letting the socket code return to normal.

This will likely need finetuning for a wider variety of workloads, but
for now stick to the vmpressure presets and keep hysteresis simple.

Signed-off-by: Johannes Weiner 
---
 include/linux/memcontrol.h | 27 +--
 mm/memcontrol.c| 15 +--
 mm/vmpressure.c| 25 -
 3 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7adabb7..d45379a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -247,6 +247,7 @@ struct mem_cgroup {
 
 #ifdef CONFIG_INET
struct work_struct socket_work;
+   unsigned long socket_pressure;
 #endif
 
/* List of events which userspace want to receive */
@@ -292,18 +293,34 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, 
struct zone *);
 
 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
-struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
 
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
 }
 
+#define mem_cgroup_from_counter(counter, member)   \
+   container_of(counter, struct mem_cgroup, member)
+
 struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
   struct mem_cgroup *,
   struct mem_cgroup_reclaim_cookie *);
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 
+/**
+ * parent_mem_cgroup - find the accounting parent of a memcg
+ * @memcg: memcg whose parent to find
+ *
+ * Returns the parent memcg, or NULL if this is the root or the memory
+ * controller is in legacy no-hierarchy mode.
+ */
+static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
+{
+   if (!memcg->memory.parent)
+   return NULL;
+   return mem_cgroup_from_counter(memcg->memory.parent, memory);
+}
+
 static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
  struct mem_cgroup *root)
 {
@@ -695,7 +712,13 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, 
unsigned int nr_pages);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int 
nr_pages);
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
-   return memcg->skmem_breached;
+   if (memcg->skmem_breached)
+   return true;
+   do {
+   if (time_before(jiffies, memcg->socket_pressure))
+   return true;
+   } while ((memcg = parent_mem_cgroup(memcg)));
+   return false;
 }
 #else
 static inline bool mem_cgroup_do_sockets(void)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2994c9d..e10637f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1084,9 +1084,6 @@ bool task_in_mem_cgroup(struct task_struct *task, struct 
mem_cgroup *memcg)
return ret;
 }
 
-#define mem_cgroup_from_counter(counter, member)   \
-   container_of(counter, struct mem_cgroup, member)
-
 /**
  * mem_cgroup_margin - calculate chargeable space of a memory cgroup
  * @memcg: the memory cgroup
@@ -4126,17 +4123,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
kfree(memcg);
 }
 
-/*
- * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
- */
-struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
-{
-   if (!memcg->memory.parent)
-   return NULL;
-   return mem_cgroup_from_counter(memcg->memory.parent, memory);
-}
-EXPORT_SYMBOL(parent_mem_cgroup);
-
 static void socket_work_func(struct work_struct *work);
 
 static struct cgroup_subsys_state * __ref
@@ -4181,6 +4167,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state 
*parent_css)
 #endif
 #ifdef CONFIG_INET
INIT_WORK(&memcg->socket_work, socket_work_func);
+   memcg->socket_pressure = jiffies;
 #endif
return &memcg->css;
 
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 4c25e62..07e8440 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -137,14 +137,11 @@ struct vmpressure_event {
 };
 
 static bool vmpressure_event(struct vmpressure *vmpr,
-unsigned long scanned, unsigned long reclaimed)
+ 

Re: [PATCH 8/8] mm: memcontrol: hook up vmpressure to socket pressure

2015-10-22 Thread Vladimir Davydov
On Thu, Oct 22, 2015 at 12:21:36AM -0400, Johannes Weiner wrote:
...
> @@ -185,8 +183,29 @@ static void vmpressure_work_fn(struct work_struct *work)
>   vmpr->reclaimed = 0;
>   spin_unlock(&vmpr->sr_lock);
>  
> + level = vmpressure_calc_level(scanned, reclaimed);
> +
> + if (level > VMPRESSURE_LOW) {

So we start socket_pressure at MEDIUM. Why not at LOW or CRITICAL?

> + struct mem_cgroup *memcg;
> + /*
> +  * Let the socket buffer allocator know that we are
> +  * having trouble reclaiming LRU pages.
> +  *
> +  * For hysteresis, keep the pressure state asserted
> +  * for a second in which subsequent pressure events
> +  * can occur.
> +  *
> +  * XXX: is vmpressure a global feature or part of
> +  * memcg? There shouldn't be anything memcg-specific
> +  * about exporting reclaim success ratios from the VM.
> +  */
> + memcg = container_of(vmpr, struct mem_cgroup, vmpressure);
> + if (memcg != root_mem_cgroup)
> + memcg->socket_pressure = jiffies + HZ;

Why 1 second?

Thanks,
Vladimir

> + }
> +
>   do {
> - if (vmpressure_event(vmpr, scanned, reclaimed))
> + if (vmpressure_event(vmpr, level))
>   break;
>   /*
>* If not handled, propagate the event upward into the
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/8] mm: memcontrol: hook up vmpressure to socket pressure

2015-10-21 Thread Johannes Weiner
Let the networking stack know when a memcg is under reclaim pressure,
so it can shrink its transmit windows accordingly.

Whenever the reclaim efficiency of a memcg's LRU lists drops low
enough for a MEDIUM or HIGH vmpressure event to occur, assert a
pressure state in the socket and tcp memory code that tells it to
reduce memory usage in sockets associated with said memory cgroup.

vmpressure events are edge triggered, so for hysteresis assert socket
pressure for a second to allow for subsequent vmpressure events to
occur before letting the socket code return to normal.

Signed-off-by: Johannes Weiner 
---
 include/linux/memcontrol.h |  9 +
 include/net/sock.h |  4 
 include/net/tcp.h  |  4 
 mm/memcontrol.c|  1 +
 mm/vmpressure.c| 29 -
 5 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d66ae18..b9990f7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -246,6 +246,7 @@ struct mem_cgroup {
 
 #ifdef CONFIG_INET
struct work_struct socket_work;
+   unsigned long socket_pressure;
 #endif
 
/* List of events which userspace want to receive */
@@ -696,6 +697,10 @@ void sock_update_memcg(struct sock *sk);
 void sock_release_memcg(struct sock *sk);
 bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int 
nr_pages);
+static inline bool mem_cgroup_socket_pressure(struct mem_cgroup *memcg)
+{
+   return time_before(jiffies, memcg->socket_pressure);
+}
 #else
 static inline bool mem_cgroup_do_sockets(void)
 {
@@ -716,6 +721,10 @@ static inline void mem_cgroup_uncharge_skmem(struct 
mem_cgroup *memcg,
 unsigned int nr_pages)
 {
 }
+static inline bool mem_cgroup_socket_pressure(struct mem_cgroup *memcg)
+{
+   return false;
+}
 #endif /* CONFIG_INET */
 
 #ifdef CONFIG_MEMCG_KMEM
diff --git a/include/net/sock.h b/include/net/sock.h
index 67795fc..22bfb9c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1087,6 +1087,10 @@ static inline bool sk_has_memory_pressure(const struct 
sock *sk)
 
 static inline bool sk_under_memory_pressure(const struct sock *sk)
 {
+   if (mem_cgroup_do_sockets() && sk->sk_memcg &&
+   mem_cgroup_socket_pressure(sk->sk_memcg))
+   return true;
+
if (!sk->sk_prot->memory_pressure)
return false;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 77b6c7e..c7d342c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -291,6 +291,10 @@ extern int tcp_memory_pressure;
 /* optimized version of sk_under_memory_pressure() for TCP sockets */
 static inline bool tcp_under_memory_pressure(const struct sock *sk)
 {
+   if (mem_cgroup_do_sockets() && sk->sk_memcg &&
+   mem_cgroup_socket_pressure(sk->sk_memcg))
+   return true;
+
return tcp_memory_pressure;
 }
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cb1d6aa..2e09def 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4178,6 +4178,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state 
*parent_css)
 #endif
 #ifdef CONFIG_INET
INIT_WORK(&memcg->socket_work, socket_work_func);
+   memcg->socket_pressure = jiffies;
 #endif
return &memcg->css;
 
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 4c25e62..f64c0e1 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -137,14 +137,11 @@ struct vmpressure_event {
 };
 
 static bool vmpressure_event(struct vmpressure *vmpr,
-unsigned long scanned, unsigned long reclaimed)
+enum vmpressure_levels level)
 {
struct vmpressure_event *ev;
-   enum vmpressure_levels level;
bool signalled = false;
 
-   level = vmpressure_calc_level(scanned, reclaimed);
-
mutex_lock(&vmpr->events_lock);
 
list_for_each_entry(ev, &vmpr->events, node) {
@@ -162,6 +159,7 @@ static bool vmpressure_event(struct vmpressure *vmpr,
 static void vmpressure_work_fn(struct work_struct *work)
 {
struct vmpressure *vmpr = work_to_vmpressure(work);
+   enum vmpressure_levels level;
unsigned long scanned;
unsigned long reclaimed;
 
@@ -185,8 +183,29 @@ static void vmpressure_work_fn(struct work_struct *work)
vmpr->reclaimed = 0;
spin_unlock(&vmpr->sr_lock);
 
+   level = vmpressure_calc_level(scanned, reclaimed);
+
+   if (level > VMPRESSURE_LOW) {
+   struct mem_cgroup *memcg;
+   /*
+* Let the socket buffer allocator know that we are
+* having trouble reclaiming LRU pages.
+*
+* For hysteresis, keep the pressure state asserted
+* for a second in which subsequent pressure events
+* can occur.
+