On 2025/8/22 9:37, Tiffany Yang wrote:
> There isn't yet a clear way to identify a set of "lost" time that
> everyone (or at least a wider group of users) cares about. However,
> users can perform some delay accounting by iterating over components of
> interest. This patch allows cgroup v2 freezing time to be one of those
> components.
> 
> Track the cumulative time that each v2 cgroup spends freezing and expose
> it to userland via a new local stat file in cgroupfs. Thank you to
> Michal, who provided the ASCII art in the updated documentation.
> 
> To access this value:
>   $ mkdir /sys/fs/cgroup/test
>   $ cat /sys/fs/cgroup/test/cgroup.stat.local
>   freeze_time_total 0
> 
> Ensure consistent freeze time reads with freeze_seq, a per-cgroup
> sequence counter. Writes are serialized using the css_set_lock.
> 
> Signed-off-by: Tiffany Yang <[email protected]>
> Cc: Tejun Heo <[email protected]>
> Cc: Michal Koutný <[email protected]>
> ---
> v3 -> v4:
> * Replace "freeze_time_total" with "frozen" and expose stats via
>   cgroup.stat.local, as recommended by Tejun.
> * Use the same timestamp when freezing/unfreezing a cgroup as its
>   descendants, as suggested by Michal.
> 
> v2 -> v3:
> * Use seqcount along with css_set_lock to guard freeze time accesses, as
>   suggested by Michal.
> 
> v1 -> v2:
> * Track per-cgroup freezing time instead of per-task frozen time, as
>   suggested by Tejun.
> ---
>  Documentation/admin-guide/cgroup-v2.rst | 18 ++++++++++++++++
>  include/linux/cgroup-defs.h             | 17 +++++++++++++++
>  kernel/cgroup/cgroup.c                  | 28 +++++++++++++++++++++++++
>  kernel/cgroup/freezer.c                 | 16 ++++++++++----
>  4 files changed, 75 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/admin-guide/cgroup-v2.rst 
> b/Documentation/admin-guide/cgroup-v2.rst
> index 51c0bc4c2dc5..a1e3d431974c 100644
> --- a/Documentation/admin-guide/cgroup-v2.rst
> +++ b/Documentation/admin-guide/cgroup-v2.rst
> @@ -1001,6 +1001,24 @@ All cgroup core files are prefixed with "cgroup."
>               Total number of dying cgroup subsystems (e.g. memory
>               cgroup) at and beneath the current cgroup.
>  
> +  cgroup.stat.local
> +     A read-only flat-keyed file which exists in non-root cgroups.
> +     The following entry is defined:
> +
> +       frozen_usec
> +             Cumulative time that this cgroup has spent between freezing and
> +             thawing, regardless of whether by self or ancestor groups.
> +             NB: (not) reaching "frozen" state is not accounted here.
> +
> +             Using the following ASCII representation of a cgroup's freezer
> +             state, ::
> +
> +                            1    _____
> +                     frozen 0 __/     \__
> +                               ab    cd
> +
> +             the duration being measured is the span between a and c.
> +
>    cgroup.freeze
>       A read-write single value file which exists on non-root cgroups.
>       Allowed values are "0" and "1". The default is "0".
> diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
> index 6b93a64115fe..539c64eeef38 100644
> --- a/include/linux/cgroup-defs.h
> +++ b/include/linux/cgroup-defs.h
> @@ -433,6 +433,23 @@ struct cgroup_freezer_state {
>        * frozen, SIGSTOPped, and PTRACEd.
>        */
>       int nr_frozen_tasks;
> +
> +     /* Freeze time data consistency protection */
> +     seqcount_t freeze_seq;
> +
> +     /*
> +      * Most recent time the cgroup was requested to freeze.
> +      * Accesses guarded by freeze_seq counter. Writes serialized
> +      * by css_set_lock.
> +      */
> +     u64 freeze_start_nsec;
> +
> +     /*
> +      * Total duration the cgroup has spent freezing.
> +      * Accesses guarded by freeze_seq counter. Writes serialized
> +      * by css_set_lock.
> +      */
> +     u64 frozen_nsec;
>  };
>  
>  struct cgroup {
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index 312c6a8b55bb..ab096b884bbc 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -3763,6 +3763,27 @@ static int cgroup_stat_show(struct seq_file *seq, void 
> *v)
>       return 0;
>  }
>  
> +static int cgroup_core_local_stat_show(struct seq_file *seq, void *v)
> +{
> +     struct cgroup *cgrp = seq_css(seq)->cgroup;
> +     unsigned int sequence;
> +     u64 freeze_time;
> +
> +     do {
> +             sequence = read_seqcount_begin(&cgrp->freezer.freeze_seq);
> +             freeze_time = cgrp->freezer.frozen_nsec;
> +             /* Add in current freezer interval if the cgroup is freezing. */
> +             if (test_bit(CGRP_FREEZE, &cgrp->flags))
> +                     freeze_time += (ktime_get_ns() -
> +                                     cgrp->freezer.freeze_start_nsec);
> +     } while (read_seqcount_retry(&cgrp->freezer.freeze_seq, sequence));
> +
> +     seq_printf(seq, "frozen_usec %llu\n",
> +                (unsigned long long) freeze_time / NSEC_PER_USEC);
> +
> +     return 0;
> +}
> +
>  #ifdef CONFIG_CGROUP_SCHED
>  /**
>   * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem
> @@ -5354,6 +5375,11 @@ static struct cftype cgroup_base_files[] = {
>               .name = "cgroup.stat",
>               .seq_show = cgroup_stat_show,
>       },
> +     {
> +             .name = "cgroup.stat.local",
> +             .flags = CFTYPE_NOT_ON_ROOT,
> +             .seq_show = cgroup_core_local_stat_show,
> +     },
>       {
>               .name = "cgroup.freeze",
>               .flags = CFTYPE_NOT_ON_ROOT,
> @@ -5763,6 +5789,7 @@ static struct cgroup *cgroup_create(struct cgroup 
> *parent, const char *name,
>        * if the parent has to be frozen, the child has too.
>        */
>       cgrp->freezer.e_freeze = parent->freezer.e_freeze;
> +     seqcount_init(&cgrp->freezer.freeze_seq);
>       if (cgrp->freezer.e_freeze) {
>               /*
>                * Set the CGRP_FREEZE flag, so when a process will be
> @@ -5771,6 +5798,7 @@ static struct cgroup *cgroup_create(struct cgroup 
> *parent, const char *name,
>                * consider it frozen immediately.
>                */
>               set_bit(CGRP_FREEZE, &cgrp->flags);
> +             cgrp->freezer.freeze_start_nsec = ktime_get_ns();
>               set_bit(CGRP_FROZEN, &cgrp->flags);
>       }
>  
> diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c
> index bf1690a167dd..6c18854bff34 100644
> --- a/kernel/cgroup/freezer.c
> +++ b/kernel/cgroup/freezer.c
> @@ -171,7 +171,7 @@ static void cgroup_freeze_task(struct task_struct *task, 
> bool freeze)
>  /*
>   * Freeze or unfreeze all tasks in the given cgroup.
>   */
> -static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
> +static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze, u64 ts_nsec)
>  {
>       struct css_task_iter it;
>       struct task_struct *task;
> @@ -179,10 +179,16 @@ static void cgroup_do_freeze(struct cgroup *cgrp, bool 
> freeze)
>       lockdep_assert_held(&cgroup_mutex);
>  
>       spin_lock_irq(&css_set_lock);
> -     if (freeze)
> +     write_seqcount_begin(&cgrp->freezer.freeze_seq);
> +     if (freeze) {
>               set_bit(CGRP_FREEZE, &cgrp->flags);
> -     else
> +             cgrp->freezer.freeze_start_nsec = ts_nsec;
> +     } else {
>               clear_bit(CGRP_FREEZE, &cgrp->flags);
> +             cgrp->freezer.frozen_nsec += (ts_nsec -
> +                     cgrp->freezer.freeze_start_nsec);
> +     }
> +     write_seqcount_end(&cgrp->freezer.freeze_seq);
>       spin_unlock_irq(&css_set_lock);
> 

Hello Tiffany,

I wanted to check if there are any specific considerations regarding how we 
should input the ts_nsec
value.

Would it be possible to define this directly within the cgroup_do_freeze 
function rather than
passing it as a parameter? This approach might simplify the implementation and 
potentially improve
timing accuracy when it have lots of descendants.

-- 
Best regards,
Ridong

>       if (freeze)
> @@ -260,6 +266,7 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze)
>       struct cgroup *parent;
>       struct cgroup *dsct;
>       bool applied = false;
> +     u64 ts_nsec;
>       bool old_e;
>  
>       lockdep_assert_held(&cgroup_mutex);
> @@ -271,6 +278,7 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze)
>               return;
>  
>       cgrp->freezer.freeze = freeze;
> +     ts_nsec = ktime_get_ns();
>  
>       /*
>        * Propagate changes downwards the cgroup tree.
> @@ -298,7 +306,7 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze)
>               /*
>                * Do change actual state: freeze or unfreeze.
>                */
> -             cgroup_do_freeze(dsct, freeze);
> +             cgroup_do_freeze(dsct, freeze, ts_nsec);
>               applied = true;
>       }
>  


Reply via email to