The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at 
https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-123.1.2.vz7.5.9
------>
commit ed523ec92064c7b792fcbfea3a01cf9f1e80dd63
Author: Vladimir Davydov <vdavy...@parallels.com>
Date:   Thu Jun 4 16:58:28 2015 +0400

    sched: Revert "SCHED: rework cputime accounting (v2)"
    
    This reverts commit 6071473d0440fcfd128f3243dfb82d19f6aef668.
    
    The above-mentioned commit dramatically complicates porting of cpu acct
    patches from RH6, so revert it. The next patch will fix cpu accounting
    once again.
    
    Related to https://jira.sw.ru/browse/PSBM-33642
    
    Signed-off-by: Vladimir Davydov <vdavy...@parallels.com>
    
    Conflicts:
        kernel/sched/core.c
---
 drivers/iommu/amd_iommu.c   |  4 +-
 include/linux/fairsched.h   |  3 +-
 include/linux/kernel_stat.h |  1 +
 include/linux/sched.h       |  3 --
 kernel/sched/core.c         | 92 +++++++++++++++++++++++++--------------------
 kernel/sched/cputime.c      | 15 --------
 kernel/sched/sched.h        |  1 +
 7 files changed, 57 insertions(+), 62 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index c1eefe2..6dc6594 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4227,7 +4227,7 @@ static int set_affinity(struct irq_data *data, const 
struct cpumask *mask,
        return 0;
 }
 
-static int amd_iommu_free_irq(int irq)
+static int free_irq(int irq)
 {
        struct irq_2_irte *irte_info;
        struct irq_cfg *cfg;
@@ -4352,7 +4352,7 @@ struct irq_remap_ops amd_iommu_irq_ops = {
        .enable_faulting        = amd_iommu_enable_faulting,
        .setup_ioapic_entry     = setup_ioapic_entry,
        .set_affinity           = set_affinity,
-       .free_irq               = amd_iommu_free_irq,
+       .free_irq               = free_irq,
        .compose_msi_msg        = compose_msi_msg,
        .msi_alloc_irq          = msi_alloc_irq,
        .msi_setup_irq          = msi_setup_irq,
diff --git a/include/linux/fairsched.h b/include/linux/fairsched.h
index 12bbc5b..e242c0d 100644
--- a/include/linux/fairsched.h
+++ b/include/linux/fairsched.h
@@ -18,6 +18,8 @@
 
 #ifdef __KERNEL__
 
+struct kernel_cpustat;
+
 #ifdef CONFIG_VZ_FAIRSCHED
 
 #define FSCHWEIGHT_MAX         ((1 << 16) - 1)
@@ -79,7 +81,6 @@ static inline int fairsched_get_cpu_stat(const char *name, 
struct kernel_cpustat
 
 #endif /* CONFIG_VZ_FAIRSCHED */
 
-struct kernel_cpustat;
 void cpu_cgroup_get_stat(struct cgroup *cgrp, struct kernel_cpustat *kstat);
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index a63a497..d105ab3 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -6,6 +6,7 @@
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <linux/vtime.h>
 #include <asm/irq.h>
 #include <asm/cputime.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e62dc2b..f4a5e3d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -53,7 +53,6 @@ struct sched_param {
 #include <linux/uidgid.h>
 #include <linux/gfp.h>
 #include <linux/ve_proto.h>
-#include <linux/kernel_stat.h>
 
 #include <asm/processor.h>
 
@@ -976,8 +975,6 @@ struct sched_avg {
 
 #ifdef CONFIG_SCHEDSTATS
 struct sched_statistics {
-       u64                     cpustat[NR_STATS];
-
        u64                     wait_start;
        u64                     wait_max;
        u64                     wait_count;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0e8c921..50273af 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7394,6 +7394,7 @@ void __init sched_init(void)
 #endif /* CONFIG_CPUMASK_OFFSTACK */
        }
 
+       root_task_group.cpustat = alloc_percpu(struct kernel_cpustat);
        root_task_group.taskstats = alloc_percpu(struct taskstats);
 
 #ifdef CONFIG_SMP
@@ -7694,6 +7695,7 @@ static void free_sched_group(struct task_group *tg)
        free_fair_sched_group(tg);
        free_rt_sched_group(tg);
        autogroup_free(tg);
+       free_percpu(tg->cpustat);
        free_percpu(tg->taskstats);
        kfree(tg);
 }
@@ -7713,6 +7715,10 @@ struct task_group *sched_create_group(struct task_group 
*parent)
        if (!alloc_rt_sched_group(tg, parent))
                goto err;
 
+       tg->cpustat = alloc_percpu(struct kernel_cpustat);
+       if (!tg->cpustat)
+               goto err;
+
        tg->taskstats = alloc_percpu(struct taskstats);
        if (!tg->taskstats)
                goto err;
@@ -8661,16 +8667,34 @@ static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, 
struct cftype *cft)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
-static void __task_group_get_cpu_stat(struct task_group *tg, int cpu,
-                                     struct kernel_cpustat *kcpustat)
+static u64 cpu_cgroup_usage_cpu(struct task_group *tg, int i)
+{
+#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SCHEDSTATS)
+       /* root_task_group has not sched entities */
+       if (tg == &root_task_group)
+               return cpu_rq(i)->rq_cpu_time;
+
+       return tg->se[i]->sum_exec_runtime;
+#else
+       return 0;
+#endif
+}
+
+static void cpu_cgroup_update_stat(struct task_group *tg, int i)
 {
 #if defined(CONFIG_SCHEDSTATS) && defined(CONFIG_FAIR_GROUP_SCHED)
-       struct sched_entity *se = tg->se[cpu];
-       u64 now = cpu_clock(cpu);
+       struct sched_entity *se = tg->se[i];
+       struct kernel_cpustat *kcpustat = per_cpu_ptr(tg->cpustat, i);
+       u64 now = cpu_clock(i);
        u64 delta, idle, iowait;
 
+       /* root_task_group has not sched entities */
+       if (tg == &root_task_group)
+               return;
+
        iowait = se->statistics.iowait_sum;
        idle = se->statistics.sum_sleep_runtime;
+       kcpustat->cpustat[CPUTIME_STEAL] = se->statistics.wait_sum;
 
        if (idle > iowait)
                idle -= iowait;
@@ -8691,28 +8715,13 @@ static void __task_group_get_cpu_stat(struct task_group 
*tg, int cpu,
                        kcpustat->cpustat[CPUTIME_STEAL] += delta;
        }
 
-       kcpustat->cpustat[CPUTIME_USER] = se->statistics.cpustat[CPUTIME_USER];
-       kcpustat->cpustat[CPUTIME_NICE] = se->statistics.cpustat[CPUTIME_NICE];
-       kcpustat->cpustat[CPUTIME_SYSTEM] =
-               se->statistics.cpustat[CPUTIME_SYSTEM];
        kcpustat->cpustat[CPUTIME_IDLE] =
                max(kcpustat->cpustat[CPUTIME_IDLE], idle);
        kcpustat->cpustat[CPUTIME_IOWAIT] =
                max(kcpustat->cpustat[CPUTIME_IOWAIT], iowait);
-       kcpustat->cpustat[CPUTIME_STEAL] = se->statistics.wait_sum;
-       kcpustat->cpustat[CPUTIME_USED] = se->sum_exec_runtime;
-#endif
-}
 
-static void task_group_get_cpu_stat(struct task_group *tg, int cpu,
-                                   struct kernel_cpustat *kcpustat)
-{
-       if (tg == &root_task_group) {
-               memcpy(kcpustat, &kcpustat_cpu(cpu), sizeof(*kcpustat));
-               return;
-       }
-       memset(kcpustat, 0, sizeof(*kcpustat));
-       __task_group_get_cpu_stat(tg, cpu, kcpustat);
+       kcpustat->cpustat[CPUTIME_USED] = cpu_cgroup_usage_cpu(tg, i);
+#endif
 }
 
 int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct cftype *cft,
@@ -8724,7 +8733,7 @@ int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct 
cftype *cft,
        u64 user, nice, system, idle, iowait, steal;
        struct timespec boottime;
        struct task_group *tg = cgroup_tg(cgrp);
-       struct kernel_cpustat st;
+       struct kernel_cpustat *kcpustat;
        unsigned long tg_nr_running = 0;
        unsigned long tg_nr_iowait = 0;
        unsigned long long tg_nr_switches = 0;
@@ -8736,14 +8745,16 @@ int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct 
cftype *cft,
        user = nice = system = idle = iowait = steal = 0;
 
        for_each_possible_cpu(i) {
-               task_group_get_cpu_stat(tg, i, &st);
+               kcpustat = per_cpu_ptr(tg->cpustat, i);
 
-               user    += st.cpustat[CPUTIME_USER];
-               nice    += st.cpustat[CPUTIME_NICE];
-               system  += st.cpustat[CPUTIME_SYSTEM];
-               idle    += st.cpustat[CPUTIME_IDLE];
-               iowait  += st.cpustat[CPUTIME_IOWAIT];
-               steal   += st.cpustat[CPUTIME_STEAL];
+               cpu_cgroup_update_stat(tg, i);
+
+               user += kcpustat->cpustat[CPUTIME_USER];
+               nice += kcpustat->cpustat[CPUTIME_NICE];
+               system += kcpustat->cpustat[CPUTIME_SYSTEM];
+               idle += kcpustat->cpustat[CPUTIME_IDLE];
+               iowait += kcpustat->cpustat[CPUTIME_IOWAIT];
+               steal += kcpustat->cpustat[CPUTIME_STEAL];
 
                /* root task group has autogrouping, so this doesn't hold */
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8770,15 +8781,14 @@ int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct 
cftype *cft,
                for_each_online_cpu(j) {
                        if (j % nr_ve_vcpus != i)
                                continue;
-
-                       task_group_get_cpu_stat(tg, i, &st);
-
-                       user    += st.cpustat[CPUTIME_USER];
-                       nice    += st.cpustat[CPUTIME_NICE];
-                       system  += st.cpustat[CPUTIME_SYSTEM];
-                       idle    += st.cpustat[CPUTIME_IDLE];
-                       iowait  += st.cpustat[CPUTIME_IOWAIT];
-                       steal   += st.cpustat[CPUTIME_STEAL];
+                       kcpustat = per_cpu_ptr(tg->cpustat, j);
+
+                       user += kcpustat->cpustat[CPUTIME_USER];
+                       nice += kcpustat->cpustat[CPUTIME_NICE];
+                       system += kcpustat->cpustat[CPUTIME_SYSTEM];
+                       idle += kcpustat->cpustat[CPUTIME_IDLE];
+                       iowait += kcpustat->cpustat[CPUTIME_IOWAIT];
+                       steal += kcpustat->cpustat[CPUTIME_STEAL];
                }
                seq_printf(p,
                        "cpu%d %llu %llu %llu %llu %llu 0 0 %llu\n",
@@ -8845,12 +8855,12 @@ void cpu_cgroup_get_stat(struct cgroup *cgrp, struct 
kernel_cpustat *kstat)
        memset(kstat, 0, sizeof(struct kernel_cpustat));
 
        for_each_possible_cpu(i) {
-               struct kernel_cpustat st;
+               struct kernel_cpustat *st = per_cpu_ptr(tg->cpustat, i);
 
-               task_group_get_cpu_stat(tg, i, &st);
+               cpu_cgroup_update_stat(tg, i);
 
                for (j = 0; j < NR_STATS; j++)
-                       kstat->cpustat[j] += st.cpustat[j];
+                       kstat->cpustat[j] += st->cpustat[j];
        }
 }
 
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3ad9fa8..ba4bfc0 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -112,19 +112,6 @@ static int irqtime_account_si_update(void)
 
 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 
-static inline void update_stats_account_cpu_time(struct task_struct *p,
-                                                int index, u64 tmp)
-{
-#if defined(CONFIG_SCHEDSTATS) && defined(CONFIG_FAIR_GROUP_SCHED)
-       struct sched_entity *se = &p->se;
-
-       do {
-               se->statistics.cpustat[index] += tmp;
-               se = se->parent;
-       } while (se);
-#endif
-}
-
 static inline void task_group_account_field(struct task_struct *p, int index,
                                            u64 tmp)
 {
@@ -136,8 +123,6 @@ static inline void task_group_account_field(struct 
task_struct *p, int index,
         */
        __get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
 
-       update_stats_account_cpu_time(p, index, tmp);
-
        cpuacct_account_field(p, index, tmp);
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e4f92a5..e0c03d8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -167,6 +167,7 @@ struct task_group {
        struct autogroup *autogroup;
 #endif
 
+       struct kernel_cpustat __percpu *cpustat;
        struct taskstats __percpu *taskstats;
        unsigned long avenrun[3];       /* loadavg data */
        struct timespec start_time;
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to