The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-123.1.2.vz7.5.5 ------> commit 033e46f2a9faff27e286af70c8ecfffdb25d4dc0 Author: Vladimir Davydov <vdavy...@parallels.com> Date: Fri May 22 18:41:32 2015 +0400
bc/oom: zap oom kill related stuff Per container OOM killer logic has to be implemented in the scope of the memory cgroup. Remove ub-related code, it does not work properly anyway. khorenko@: task for per-CT OOM implementation: https://jira.sw.ru/browse/PSBM-33732 Signed-off-by: Vladimir Davydov <vdavy...@parallels.com> --- arch/x86/mm/fault.c | 2 - drivers/tty/sysrq.c | 3 - fs/proc/base.c | 2 - include/bc/beancounter.h | 8 -- include/bc/oom_kill.h | 19 ---- include/bc/vmpages.h | 2 - include/linux/mm_types.h | 2 - include/linux/oom.h | 13 --- kernel/bc/beancounter.c | 4 - kernel/bc/oom_kill.c | 289 ----------------------------------------------- kernel/bc/proc.c | 2 - kernel/bc/statd.c | 3 - kernel/bc/vm_pages.c | 46 -------- kernel/exit.c | 1 - kernel/fork.c | 5 - mm/page_alloc.c | 4 - 16 files changed, 405 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ef84bec..203dd90 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -20,8 +20,6 @@ #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ #include <asm/fixmap.h> /* VSYSCALL_START */ -#include <bc/oom_kill.h> - #define CREATE_TRACE_POINTS #include <asm/trace/exceptions.h> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 5a2921f..069928f 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -46,7 +46,6 @@ #include <linux/jiffies.h> #include <linux/ve.h> -#include <bc/oom_kill.h> #include <bc/vmpages.h> #include <asm/ptrace.h> @@ -363,8 +362,6 @@ static struct sysrq_key_op sysrq_term_op = { static void moom_callback(struct work_struct *ignored) { - //ub_oom_start(&global_oom_ctrl); - //global_oom_ctrl.kill_counter = 0; out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL, 0, NULL, true); } diff --git a/fs/proc/base.c b/fs/proc/base.c index 79ee3c8..caa530c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -95,8 +95,6 @@ #include "internal.h" #include "fd.h" -#include <bc/oom_kill.h> - /* NOTE: * Implementing inode permission operations in /proc is almost * certainly an error. Permission checks need to happen during diff --git a/include/bc/beancounter.h b/include/bc/beancounter.h index 4337e13..202a5f5 100644 --- a/include/bc/beancounter.h +++ b/include/bc/beancounter.h @@ -19,7 +19,6 @@ #include <linux/threads.h> #include <linux/percpu.h> #include <linux/percpu_counter.h> -#include <linux/oom.h> #include <linux/ratelimit.h> #include <linux/cgroup.h> #include <bc/debug.h> @@ -155,17 +154,14 @@ struct user_beancounter { struct ubparm *ub_store; struct ub_percpu_struct *ub_percpu; - struct oom_control oom_ctrl; }; enum ub_flags { UB_DIRTY_EXCEEDED, - UB_OOM_NOPROC, UB_OOM_MANUAL_SCORE_ADJ, }; extern int ub_count; -extern struct oom_control global_oom_ctrl; enum ub_severity { UB_HARD, UB_SOFT, UB_FORCE }; @@ -321,10 +317,6 @@ extern void __uncharge_beancounter_locked(struct user_beancounter *ub, extern void uncharge_warn(struct user_beancounter *ub, const char *resource, unsigned long val, unsigned long held); -extern long ub_oomguarpages_left(struct user_beancounter *ub); -extern void ub_update_resources_locked(struct user_beancounter *ub); -extern void ub_update_resources(struct user_beancounter *ub); - extern int ub_update_mem_cgroup_limits(struct user_beancounter *ub); extern void ub_get_mem_cgroup_parms(struct user_beancounter *ub, diff --git a/include/bc/oom_kill.h b/include/bc/oom_kill.h deleted file mode 100644 index bbfc3a3..0000000 --- a/include/bc/oom_kill.h +++ /dev/null @@ -1,19 +0,0 @@ -#include <bc/decl.h> -#include <bc/task.h> -#include <bc/beancounter.h> - -UB_DECLARE_FUNC(int, ub_oom_lock(struct oom_control *oom_ctrl)) -UB_DECLARE_FUNC(struct user_beancounter *, ub_oom_select_worst(void)) -UB_DECLARE_VOID_FUNC(ub_oom_unlock(struct oom_control *oom_ctrl)) -static inline void ub_oom_mm_dead(struct mm_struct *mm) { } -UB_DECLARE_FUNC(int, ub_oom_task_skip(struct user_beancounter *ub, - struct task_struct *tsk)) -static inline unsigned long ub_oom_total_pages(struct user_beancounter *ub) { return 0; } -static inline int out_of_memory_in_ub(struct user_beancounter *ub, - gfp_t gfp_mask) { return 0; } -UB_DECLARE_VOID_FUNC(ub_oom_start(struct oom_control *oom_ctrl)) -UB_DECLARE_VOID_FUNC(ub_oom_mark_mm(struct mm_struct *mm, - struct oom_control *oom_ctrl)) - -#ifdef CONFIG_BEANCOUNTERS -#endif diff --git a/include/bc/vmpages.h b/include/bc/vmpages.h index f7a36ba..65d9de4 100644 --- a/include/bc/vmpages.h +++ b/include/bc/vmpages.h @@ -47,8 +47,6 @@ UB_DECLARE_FUNC(int, ub_lockedshm_charge(struct shmem_inode_info *shi, UB_DECLARE_VOID_FUNC(ub_lockedshm_uncharge(struct shmem_inode_info *shi, unsigned long size)) -extern void __ub_update_oomguarpages(struct user_beancounter *ub); - static inline int ub_swap_full(struct user_beancounter *ub) { return (ub->ub_parms[UB_SWAPPAGES].held * 2 > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9973661..5ddfa80 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -414,8 +414,6 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ unsigned int vps_dumpable:2; - unsigned int global_oom:1; - unsigned int ub_oom:1; #ifdef CONFIG_BEANCOUNTERS struct user_beancounter *mm_ub; diff --git a/include/linux/oom.h b/include/linux/oom.h index 0dbbe47..c13af3f 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -88,19 +88,6 @@ static inline void oom_killer_enable(void) extern struct task_struct *find_lock_task_mm(struct task_struct *p); -struct oom_control { - int generation; - int kill_counter; - unsigned long last_kill; - int oom_rage; - spinlock_t lock; - wait_queue_head_t wq; -}; - -extern struct oom_control global_oom_ctrl; - -static inline void init_oom_control(struct oom_control *oom_ctrl) { } - void oom_report_invocation(char *type, struct user_beancounter *ub, gfp_t gfp_mask, int order); diff --git a/kernel/bc/beancounter.c b/kernel/bc/beancounter.c index 5cc0688..88faa1c 100644 --- a/kernel/bc/beancounter.c +++ b/kernel/bc/beancounter.c @@ -450,7 +450,6 @@ static inline int bc_verify_held(struct user_beancounter *ub) ub_stat_mod(ub, dirty_pages, __ub_percpu_sum(ub, dirty_pages)); ub_stat_mod(ub, writeback_pages, __ub_percpu_sum(ub, writeback_pages)); uncharge_beancounter_precharge(ub); - ub_update_resources_locked(ub); clean = 1; for (i = 0; i < UB_RESOURCES; i++) @@ -955,7 +954,6 @@ static void init_beancounter_struct(struct user_beancounter *ub) spin_lock_init(&ub->ub_lock); INIT_LIST_HEAD(&ub->ub_tcp_sk_list); INIT_LIST_HEAD(&ub->ub_other_sk_list); - init_oom_control(&ub->oom_ctrl); spin_lock_init(&ub->rl_lock); ub->rl_wall.tv64 = LLONG_MIN; } @@ -1133,8 +1131,6 @@ void __init ub_init_late(void) init_beancounter_syslimits(&default_beancounter); #endif init_beancounter_struct(&default_beancounter); - - init_oom_control(&global_oom_ctrl); } int __init ub_init_cgroup(void) diff --git a/kernel/bc/oom_kill.c b/kernel/bc/oom_kill.c deleted file mode 100644 index bd213df..0000000 --- a/kernel/bc/oom_kill.c +++ /dev/null @@ -1,289 +0,0 @@ -#include <linux/wait.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/cpuset.h> -#include <linux/module.h> -#include <linux/oom.h> - -#include <bc/beancounter.h> -#include <bc/oom_kill.h> -#include <bc/vmpages.h> - -#define UB_OOM_TIMEOUT (5 * HZ) - -void ub_oom_start(struct oom_control *oom_ctrl) -{ - current->task_bc.oom_generation = oom_ctrl->generation; -} - -static inline const char *oom_ctrl_id(struct oom_control *ctrl) -{ - return (ctrl == &global_oom_ctrl ? "-1" : - container_of(ctrl, struct user_beancounter, - oom_ctrl)->ub_name); -} - -static void __ub_release_oom_control(struct oom_control *oom_ctrl, char *why) -{ - printk("<<< %s oom generation %d ends (%s)\n", - oom_ctrl_id(oom_ctrl), oom_ctrl->generation, why); - oom_ctrl->kill_counter = 0; - oom_ctrl->generation++; - - /* if there is time to sleep in ub_oom_lock -> sleep will continue */ - wake_up_all(&oom_ctrl->wq); -} - -static void ub_release_oom_control(struct oom_control *oom_ctrl) -{ - spin_lock(&oom_ctrl->lock); - __ub_release_oom_control(oom_ctrl, "task died"); - spin_unlock(&oom_ctrl->lock); -} - -/* - * Must be called under task_lock() held - */ -void ub_oom_mark_mm(struct mm_struct *mm, struct oom_control *oom_ctrl) -{ - mm_ub(mm)->ub_parms[UB_OOMGUARPAGES].failcnt++; - - if (oom_ctrl == &global_oom_ctrl) - mm->global_oom = 1; - else if (oom_ctrl == &mm->mm_ub->oom_ctrl) - mm->ub_oom = 1; - else { - /* - * Task can be killed when using either global oom ctl - * or by mm->mm_ub one. In other case we must release ctl now. - * When this task will die it'll have to decide with ctl - * to use lokking at this flag and we have to sure it - * will use the proper one. - */ - __ub_release_oom_control(oom_ctrl, "mark bug"); - WARN_ON(1); - } -} - -static inline int ub_oom_completed(struct oom_control *oom_ctrl) -{ - if (test_thread_flag(TIF_MEMDIE)) - /* we were oom killed - just die */ - return 1; - if (current->task_bc.oom_generation != oom_ctrl->generation) - /* some task was succesfully killed */ - return 1; - return 0; -} - -static void ub_clear_oom(void) -{ - struct user_beancounter *ub; - - rcu_read_lock(); - for_each_beancounter(ub) - clear_bit(UB_OOM_NOPROC, &ub->ub_flags); - rcu_read_unlock(); -} - -int ub_oom_lock(struct oom_control *oom_ctrl) -{ - int timeout; - DEFINE_WAIT(oom_w); - - if (oom_ctrl != &global_oom_ctrl && global_oom_ctrl.kill_counter) { - /* - * Check if global OOM killeris on the way. If so - - * let the senior handle the situation. - */ - wait_event_killable(global_oom_ctrl.wq, - global_oom_ctrl.kill_counter == 0); - return -EAGAIN; - } - - spin_lock(&oom_ctrl->lock); - if (!oom_ctrl->kill_counter && !ub_oom_completed(oom_ctrl)) - goto out_do_oom; - - timeout = UB_OOM_TIMEOUT; - while (1) { - if (ub_oom_completed(oom_ctrl)) { - spin_unlock(&oom_ctrl->lock); - /* - * We raced with some other OOM killer and need - * to update generation to be sure, that we can - * call OOM killer on next loop iteration. - */ - ub_oom_start(oom_ctrl); - return -EAGAIN; - } - - if (timeout == 0) { - /* - * Time is up, let's kill somebody else but - * release the oom ctl since the stuck task - * wasn't able to do it. - */ - __ub_release_oom_control(oom_ctrl, "oom tmo"); - break; - } - - __set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&oom_ctrl->wq, &oom_w); - spin_unlock(&oom_ctrl->lock); - - timeout = schedule_timeout(timeout); - - spin_lock(&oom_ctrl->lock); - remove_wait_queue(&oom_ctrl->wq, &oom_w); - - } - -out_do_oom: - ub_clear_oom(); - printk(">>> %s oom generation %d starts\n", - oom_ctrl_id(oom_ctrl), oom_ctrl->generation); - return 0; -} - -static inline long ub_current_overdraft(struct user_beancounter *ub) -{ - return ((ub->ub_parms[UB_KMEMSIZE].held - + ub->ub_parms[UB_TCPSNDBUF].held - + ub->ub_parms[UB_TCPRCVBUF].held - + ub->ub_parms[UB_OTHERSOCKBUF].held - + ub->ub_parms[UB_DGRAMRCVBUF].held) - >> PAGE_SHIFT) - ub_oomguarpages_left(ub); -} - -int ub_oom_task_skip(struct user_beancounter *ub, struct task_struct *tsk) -{ - struct user_beancounter *mm_ub; - - if (ub == NULL) - return 0; - - task_lock(tsk); - if (tsk->mm == NULL) - mm_ub = NULL; - else - mm_ub = tsk->mm->mm_ub; - - task_unlock(tsk); - - return mm_ub != ub; -} - -struct user_beancounter *ub_oom_select_worst(void) -{ - struct user_beancounter *ub, *walkp; - long ub_maxover; - - ub_maxover = 0; - ub = NULL; - - rcu_read_lock(); - for_each_beancounter (walkp) { - long ub_overdraft; - - if (test_bit(UB_OOM_NOPROC, &walkp->ub_flags)) - continue; - - ub_overdraft = ub_current_overdraft(walkp); - if (ub_overdraft > ub_maxover && get_beancounter_rcu(walkp)) { - put_beancounter(ub); - ub = walkp; - ub_maxover = ub_overdraft; - } - } - - if (ub) { - set_bit(UB_OOM_NOPROC, &ub->ub_flags); - printk(KERN_INFO "OOM selected worst BC %s (overdraft %lu):\n", - ub->ub_name, ub_maxover); - __show_ub_mem(ub); - } - rcu_read_unlock(); - - return ub; -} - -void ub_oom_unlock(struct oom_control *oom_ctrl) -{ - spin_unlock(&oom_ctrl->lock); -} - -void ub_oom_mm_dead(struct mm_struct *mm) -{ - printk("OOM killed process %s (pid=%d, ve=%s) exited, " - "free=%lu.\n", - current->comm, current->pid, - task_ve_name(current), - nr_free_pages()); - - if (mm->global_oom) { - if (printk_ratelimit()) - show_mem(SHOW_MEM_FILTER_NODES); - ub_release_oom_control(&global_oom_ctrl); - } - - if (mm->ub_oom) { - struct user_beancounter *ub = mm_ub(mm); - - if (__ratelimit(&ub->ub_ratelimit)) - show_ub_mem(ub); - ub_release_oom_control(&ub->oom_ctrl); - } -} - -unsigned long ub_oom_total_pages(struct user_beancounter *ub) -{ - return min(totalram_pages, ub->ub_parms[UB_PHYSPAGES].limit) + - min(total_swap_pages, ub->ub_parms[UB_SWAPPAGES].limit); -} - -int out_of_memory_in_ub(struct user_beancounter *ub, gfp_t gfp_mask) -{ - struct task_struct *p; - int res = 0; - unsigned long ub_mem_pages; - int points; - - if (ub_oom_lock(&ub->oom_ctrl)) - goto out; - - oom_report_invocation("loc", ub, gfp_mask, 0); - ub_mem_pages = ub_oom_total_pages(ub); - read_lock(&tasklist_lock); - - do { - p = select_bad_process(&points, ub_mem_pages, ub, NULL, NULL); - if (PTR_ERR(p) == -1UL || !p) - break; - } while (oom_kill_process(p, gfp_mask, 0, points, ub_mem_pages, - ub, NULL, NULL, "Out of memory in UB")); - - read_unlock(&tasklist_lock); - ub_oom_unlock(&ub->oom_ctrl); - - if (!p) - res = -ENOMEM; -out: - /* - * Give "p" a good chance of killing itself before we - * retry to allocate memory unless "p" is current - */ - if (!test_thread_flag(TIF_MEMDIE)) - schedule_timeout_uninterruptible(1); - - return res; -} - -struct oom_control global_oom_ctrl; - -void init_oom_control(struct oom_control *oom_ctrl) -{ - spin_lock_init(&oom_ctrl->lock); - init_waitqueue_head(&oom_ctrl->wq); -} diff --git a/kernel/bc/proc.c b/kernel/bc/proc.c index 3d5bf1c..46ea074 100644 --- a/kernel/bc/proc.c +++ b/kernel/bc/proc.c @@ -85,7 +85,6 @@ static void __show_resources(struct seq_file *f, struct user_beancounter *ub, { int i, precharge[UB_RESOURCES]; - ub_update_resources(ub); ub_precharge_snapshot(ub, precharge); for (i = 0; i < UB_RESOURCES_COMPAT; i++) @@ -301,7 +300,6 @@ static int ub_show(struct seq_file *f, void *v) int i, precharge[UB_RESOURCES]; struct user_beancounter *ub = v; - ub_update_resources(ub); ub_precharge_snapshot(ub, precharge); for (i = 0; i < UB_RESOURCES_COMPAT; i++) diff --git a/kernel/bc/statd.c b/kernel/bc/statd.c index 25aab55..e0eac10 100644 --- a/kernel/bc/statd.c +++ b/kernel/bc/statd.c @@ -232,8 +232,6 @@ static int ubstat_get_stat(struct user_beancounter *ub, long cmd, if (retval) goto out; - ub_update_resources(ub); - spin_lock(&ubs_notify_lock); switch (UBSTAT_CMD(cmd)) { case UBSTAT_READ_ONE: @@ -384,7 +382,6 @@ static void ubstat_save_onestat(struct user_beancounter *ub) /* called with local irq disabled */ spin_lock(&ub->ub_lock); - ub_update_resources_locked(ub); for (resource = 0; resource < UB_RESOURCES; resource++) { memcpy(&ub->ub_store[resource], &ub->ub_parms[resource], sizeof(struct ubparm)); diff --git a/kernel/bc/vm_pages.c b/kernel/bc/vm_pages.c index b3d0dd0..bc1a1d7 100644 --- a/kernel/bc/vm_pages.c +++ b/kernel/bc/vm_pages.c @@ -23,52 +23,6 @@ #include <bc/beancounter.h> #include <bc/vmpages.h> #include <bc/proc.h> -#include <bc/oom_kill.h> - -void __ub_update_oomguarpages(struct user_beancounter *ub) -{ - ub->ub_parms[UB_OOMGUARPAGES].held = - ub->ub_parms[UB_PRIVVMPAGES].held + - ub->ub_parms[UB_LOCKEDPAGES].held + - ub->ub_parms[UB_PHYSPAGES].held + - ub->ub_parms[UB_SWAPPAGES].held; - - ub_adjust_maxheld(ub, UB_OOMGUARPAGES); -} - -long ub_oomguarpages_left(struct user_beancounter *ub) -{ - unsigned long flags; - long left; - int precharge[UB_RESOURCES]; - - spin_lock_irqsave(&ub->ub_lock, flags); - __ub_update_oomguarpages(ub); - left = ub->ub_parms[UB_OOMGUARPAGES].barrier - - ub->ub_parms[UB_OOMGUARPAGES].held; - spin_unlock_irqrestore(&ub->ub_lock, flags); - - ub_precharge_snapshot(ub, precharge); - left += precharge[UB_OOMGUARPAGES]; - - return left; -} - -void ub_update_resources_locked(struct user_beancounter *ub) -{ - __ub_update_oomguarpages(ub); -} -EXPORT_SYMBOL(ub_update_resources_locked); - -void ub_update_resources(struct user_beancounter *ub) -{ - unsigned long flags; - - spin_lock_irqsave(&ub->ub_lock, flags); - ub_update_resources_locked(ub); - spin_unlock_irqrestore(&ub->ub_lock, flags); -} -EXPORT_SYMBOL(ub_update_resources); int ub_memory_charge(struct mm_struct *mm, unsigned long size, unsigned vm_flags, struct file *vm_file, int sv) diff --git a/kernel/exit.c b/kernel/exit.c index 1c65d95..bbfd54d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -56,7 +56,6 @@ #include <linux/shm.h> #include <bc/misc.h> -#include <bc/oom_kill.h> #include <asm/uaccess.h> #include <asm/unistd.h> diff --git a/kernel/fork.c b/kernel/fork.c index 95950e7..bf9aab4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -82,7 +82,6 @@ #include <asm/tlbflush.h> #include <bc/misc.h> -#include <bc/oom_kill.h> #include <bc/vmpages.h> #include <trace/events/sched.h> @@ -675,8 +674,6 @@ void mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); - if (mm->global_oom || mm->ub_oom) - ub_oom_mm_dead(mm); put_mm_ub(mm); mmdrop(mm); } @@ -873,8 +870,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk) goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); - mm->global_oom = 0; - mm->ub_oom = 0; mm_init_cpumask(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cd26308..627677c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -62,8 +62,6 @@ #include <linux/sched/rt.h> #include <linux/ve.h> -#include <bc/oom_kill.h> - #include <asm/tlbflush.h> #include <asm/div64.h> #include "internal.h" @@ -2267,8 +2265,6 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, struct reclaim_state reclaim_state; int progress; - //ub_oom_start(&global_oom_ctrl); - cond_resched(); /* We now go into synchronous reclaim */ _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel