Re: [PATCH v3 5/5] psi: introduce psi monitor

Johannes Weiner Tue, 29 Jan 2019 10:36:07 -0800

Hi Minchan,

good to see your name on the lists again :)


On Tue, Jan 29, 2019 at 08:53:58AM +0900, Minchan Kim wrote:
> On Thu, Jan 24, 2019 at 01:15:18PM -0800, Suren Baghdasaryan wrote:
> > @@ -68,6 +69,50 @@ struct psi_group_cpu {
> >     u32 times_prev[NR_PSI_STATES] ____cacheline_aligned_in_smp;
> >  };
> >  
> > +/* PSI growth tracking window */
> > +struct psi_window {
> > +   /* Window size in ns */
> > +   u64 size;
> 
> As rest of field are time, how about "interval" instead of size?

If it were "size" on its own, I would agree, but "window size" is an
existing term that works pretty well here. "window interval" wouldn't.

> > +   /* Start time of the current window in ns */
> > +   u64 start_time;
> > +
> > +   /* Value at the start of the window */
> > +   u64 start_value;
> 
> "value" is rather vague. starting_stall?

I'm not a fan of using stall here, because it reads like an event,
when it's really a time interval we're interested in.

For an abstraction that samples time intervals, value seems like a
pretty good, straight-forward name...

> > +
> > +   /* Value growth per previous window(s) */
> > +   u64 per_win_growth;
> 
> Rather than per, prev would be more meaninful, I think.
> How about prev_win_stall?

Agreed on the "per", but still not loving the stall. How about
prev_delta? prev_growth?

> > +struct psi_trigger {
> > +   /* PSI state being monitored by the trigger */
> > +   enum psi_states state;
> > +
> > +   /* User-spacified threshold in ns */
> > +   u64 threshold;
> > +
> > +   /* List node inside triggers list */
> > +   struct list_head node;
> > +
> > +   /* Backpointer needed during trigger destruction */
> > +   struct psi_group *group;
> > +
> > +   /* Wait queue for polling */
> > +   wait_queue_head_t event_wait;
> > +
> > +   /* Pending event flag */
> > +   int event;
> > +
> > +   /* Tracking window */
> > +   struct psi_window win;
> > +
> > +   /*
> > +    * Time last event was generated. Used for rate-limiting
> > +    * events to one per window
> > +    */
> > +   u64 last_event_time;
> > +};
> > +
> >  struct psi_group {
> >     /* Protects data used by the aggregator */
> >     struct mutex update_lock;
> > @@ -75,6 +120,8 @@ struct psi_group {
> >     /* Per-cpu task state & time tracking */
> >     struct psi_group_cpu __percpu *pcpu;
> >  
> > +   /* Periodic work control */
> > +   atomic_t polling;
> >     struct delayed_work clock_work;
> >  
> >     /* Total stall times observed */
> > @@ -85,6 +132,18 @@ struct psi_group {
> >     u64 avg_last_update;
> >     u64 avg_next_update;
> >     unsigned long avg[NR_PSI_STATES - 1][3];
> > +
> > +   /* Configured polling triggers */
> > +   struct list_head triggers;
> > +   u32 nr_triggers[NR_PSI_STATES - 1];
> > +   u32 trigger_mask;
> 
> This is a state we have an interest.
> How about trigger_states?

Sounds good to me, I'd also switch change_mask below to
changed_states:

        if (changed_states & trigger_states)
                /* engage! */

[ After reading the rest, I see Minchan proposed the same. ]

> > +   u64 trigger_min_period;
> > +
> > +   /* Polling state */
> > +   /* Total stall times at the start of monitor activation */
> > +   u64 polling_total[NR_PSI_STATES - 1];
> > +   u64 polling_next_update;
> > +   u64 polling_until;
> >  };
> >  
> >  #else /* CONFIG_PSI */
> > diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> > index e8cd12c6a553..de3ac22a5e23 100644
> > --- a/kernel/cgroup/cgroup.c
> > +++ b/kernel/cgroup/cgroup.c
> > @@ -3464,7 +3464,101 @@ static int cgroup_cpu_pressure_show(struct seq_file 
> > *seq, void *v)
> >  {
> >     return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_CPU);
> >  }
> > -#endif
> > +
> > +static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char 
> > *buf,
> > +                                     size_t nbytes, enum psi_res res)
> > +{
> > +   enum psi_states state;
> > +   struct psi_trigger *old;
> > +   struct psi_trigger *new;
> > +   struct cgroup *cgrp;
> > +   u32 threshold_us;
> > +   u32 win_sz_us;
> 
> window_us?

We don't really encode units in variables in the rest of the code,
maybe we can drop it here as well.

Btw, it looks like the original reason for splitting up trigger_parse
and trigger_create seems gone from the code. Can we merge them again
and keep all those details out of the filesystem ->write methods?

        new = psi_trigger_create(group, buf, nbytes, res);

> > +   ssize_t ret;
> > +
> > +   cgrp = cgroup_kn_lock_live(of->kn, false);
> > +   if (!cgrp)
> > +           return -ENODEV;
> > +
> > +   cgroup_get(cgrp);
> > +   cgroup_kn_unlock(of->kn);
> > +
> > +   ret = psi_trigger_parse(buf, nbytes, res,
> > +                           &state, &threshold_us, &win_sz_us);
> > +   if (ret) {
> > +           cgroup_put(cgrp);
> > +           return ret;
> > +   }
> > +
> > +   new = psi_trigger_create(&cgrp->psi,
> > +                           state, threshold_us, win_sz_us);
> > +   if (IS_ERR(new)) {
> > +           cgroup_put(cgrp);
> > +           return PTR_ERR(new);
> > +   }
> > +
> > +   old = of->priv;
> > +   rcu_assign_pointer(of->priv, new);
> > +   if (old) {
> > +           synchronize_rcu();
> > +           psi_trigger_destroy(old);
> > +   }
> > +
> > +   cgroup_put(cgrp);
> > +
> > +   return nbytes;
> > +}
> > +
> > +static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
> > +                                     char *buf, size_t nbytes,
> > +                                     loff_t off)
> > +{
> > +   return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
> > +}
> > +
> > +static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
> > +                                     char *buf, size_t nbytes,
> > +                                     loff_t off)
> > +{
> > +   return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
> > +}
> > +
> > +static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
> > +                                     char *buf, size_t nbytes,
> > +                                     loff_t off)
> > +{
> > +   return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
> > +}
> > +
> > +static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
> > +                                     poll_table *pt)
> > +{
> > +   struct psi_trigger *t;
> > +   __poll_t ret;
> > +
> > +   rcu_read_lock();
> > +   t = rcu_dereference(of->priv);
> > +   if (t)
> > +           ret = psi_trigger_poll(t, of->file, pt);
> > +   else
> > +           ret = DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
> > +   rcu_read_unlock();
> > +
> > +   return ret;
> > +}
> > +
> > +static void cgroup_pressure_release(struct kernfs_open_file *of)
> > +{
> > +   struct psi_trigger *t = of->priv;
> > +
> > +   if (!t)
> > +           return;
> > +
> > +   rcu_assign_pointer(of->priv, NULL);
> > +   synchronize_rcu();
> > +   psi_trigger_destroy(t);
> > +}
> > +#endif /* CONFIG_PSI */
> >  
> >  static int cgroup_file_open(struct kernfs_open_file *of)
> >  {
> > @@ -4619,18 +4713,27 @@ static struct cftype cgroup_base_files[] = {
> >             .name = "io.pressure",
> >             .flags = CFTYPE_NOT_ON_ROOT,
> >             .seq_show = cgroup_io_pressure_show,
> > +           .write = cgroup_io_pressure_write,
> > +           .poll = cgroup_pressure_poll,
> > +           .release = cgroup_pressure_release,
> >     },
> >     {
> >             .name = "memory.pressure",
> >             .flags = CFTYPE_NOT_ON_ROOT,
> >             .seq_show = cgroup_memory_pressure_show,
> > +           .write = cgroup_memory_pressure_write,
> > +           .poll = cgroup_pressure_poll,
> > +           .release = cgroup_pressure_release,
> >     },
> >     {
> >             .name = "cpu.pressure",
> >             .flags = CFTYPE_NOT_ON_ROOT,
> >             .seq_show = cgroup_cpu_pressure_show,
> > +           .write = cgroup_cpu_pressure_write,
> > +           .poll = cgroup_pressure_poll,
> > +           .release = cgroup_pressure_release,
> >     },
> > -#endif
> > +#endif /* CONFIG_PSI */
> >     { }     /* terminate */
> >  };
> >  
> > diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> > index c366503ba135..fefb98f19a80 100644
> > --- a/kernel/sched/psi.c
> > +++ b/kernel/sched/psi.c
> > @@ -4,6 +4,9 @@
> >   * Copyright (c) 2018 Facebook, Inc.
> >   * Author: Johannes Weiner <han...@cmpxchg.org>
> >   *
> > + * Polling support by Suren Baghdasaryan <sur...@google.com>
> > + * Copyright (c) 2018 Google, Inc.
> > + *
> >   * When CPU, memory and IO are contended, tasks experience delays that
> >   * reduce throughput and introduce latencies into the workload. Memory
> >   * and IO contention, in addition, can cause a full loss of forward
> > @@ -126,11 +129,16 @@
> >  
> >  #include <linux/sched/loadavg.h>
> >  #include <linux/seq_file.h>
> > +#include <linux/eventfd.h>
> >  #include <linux/proc_fs.h>
> >  #include <linux/seqlock.h>
> > +#include <linux/uaccess.h>
> >  #include <linux/cgroup.h>
> >  #include <linux/module.h>
> >  #include <linux/sched.h>
> > +#include <linux/ctype.h>
> > +#include <linux/file.h>
> > +#include <linux/poll.h>
> >  #include <linux/psi.h>
> >  #include "sched.h"
> >  
> > @@ -150,11 +158,16 @@ static int __init setup_psi(char *str)
> >  __setup("psi=", setup_psi);
> >  
> >  /* Running averages - we need to be higher-res than loadavg */
> > -#define PSI_FREQ   (2*HZ+1)        /* 2 sec intervals */
> > +#define PSI_FREQ   (2*HZ+1UL)      /* 2 sec intervals */
> >  #define EXP_10s            1677            /* 1/exp(2s/10s) as fixed-point 
> > */
> >  #define EXP_60s            1981            /* 1/exp(2s/60s) */
> >  #define EXP_300s   2034            /* 1/exp(2s/300s) */
> >  
> > +/* PSI trigger definitions */
> > +#define PSI_TRIG_MIN_WIN_US 500000         /* Min window size is 500ms */
> > +#define PSI_TRIG_MAX_WIN_US 10000000       /* Max window size is 10s */
> > +#define PSI_TRIG_UPDATES_PER_WIN 10                /* 10 updates per 
> > window */
> 
> To me, it's rather long.
> How about WINDOW_MIN_US, WINDOW_MAX_US, UPDATES_PER_WINDOW?

Sounds good to me too. I'm just ambivalent on the _US suffix. Dealer's
choice, though.

> > +
> >  /* Sampling frequency in nanoseconds */
> >  static u64 psi_period __read_mostly;
> >  
> > @@ -173,8 +186,17 @@ static void group_init(struct psi_group *group)
> >     for_each_possible_cpu(cpu)
> >             seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq);
> >     group->avg_next_update = sched_clock() + psi_period;
> > +   atomic_set(&group->polling, 0);
> >     INIT_DELAYED_WORK(&group->clock_work, psi_update_work);
> >     mutex_init(&group->update_lock);
> > +   /* Init trigger-related members */
> > +   INIT_LIST_HEAD(&group->triggers);
> > +   memset(group->nr_triggers, 0, sizeof(group->nr_triggers));
> > +   group->trigger_mask = 0;
> > +   group->trigger_min_period = U32_MAX;
> > +   memset(group->polling_total, 0, sizeof(group->polling_total));
> > +   group->polling_next_update = ULLONG_MAX;
> > +   group->polling_until = 0;
> >  }
> >  
> >  void __init psi_init(void)
> > @@ -209,10 +231,11 @@ static bool test_state(unsigned int *tasks, enum 
> > psi_states state)
> >     }
> >  }
> >  
> > -static void get_recent_times(struct psi_group *group, int cpu, u32 *times)
> > +static u32 get_recent_times(struct psi_group *group, int cpu, u32 *times)
> 
> Rather awkward to return change_mask when we consider function name as
> get_recent_times It would be better to add additional parameter
> instead of return value.

Good suggestion, I have to agree this would be nicer.

> >  {
> >     struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
> >     u64 now, state_start;
> > +   u32 change_mask = 0;
> >     enum psi_states s;
> >     unsigned int seq;
> >     u32 state_mask;
> > @@ -245,7 +268,11 @@ static void get_recent_times(struct psi_group *group, 
> > int cpu, u32 *times)
> >             groupc->times_prev[s] = times[s];
> >  
> >             times[s] = delta;
> > +           if (delta)
> > +                   change_mask |= (1 << s);
> >     }
> > +
> > +   return change_mask;
> >  }
> >  
> >  static void calc_avgs(unsigned long avg[3], int missed_periods,
> > @@ -268,17 +295,14 @@ static void calc_avgs(unsigned long avg[3], int 
> > missed_periods,
> >     avg[2] = calc_load(avg[2], EXP_300s, pct);
> >  }
> >  
> > -static bool update_stats(struct psi_group *group)
> > +static u32 collect_percpu_times(struct psi_group *group)
> 
> Not sure it's a good idea to add "implementation facility" in here.
> How about update_stall_time with additional parameter of
> "[changed|updated]_states?
> 
> IOW,
> static void update_stall_times(struct psi_group *group, u32 *changed_states)

I disagree on this one. collect_percpu_times() isn't too detailed of a
name, but it does reflect the complexity/cost of the function and the
structure that is being aggregated, which is a good thing.

But the return-by-parameter is a good idea.

> >     u64 deltas[NR_PSI_STATES - 1] = { 0, };
> > -   unsigned long missed_periods = 0;
> >     unsigned long nonidle_total = 0;
> > -   u64 now, expires, period;
> > +   u32 change_mask = 0;
> >     int cpu;
> >     int s;
> >  
> > -   mutex_lock(&group->update_lock);
> > -
> >     /*
> >      * Collect the per-cpu time buckets and average them into a
> >      * single time sample that is normalized to wallclock time.
> > @@ -291,7 +315,7 @@ static bool update_stats(struct psi_group *group)
> >             u32 times[NR_PSI_STATES];
> >             u32 nonidle;
> >  
> > -           get_recent_times(group, cpu, times);
> > +           change_mask |= get_recent_times(group, cpu, times);
> >  
> >             nonidle = nsecs_to_jiffies(times[PSI_NONIDLE]);
> >             nonidle_total += nonidle;
> > @@ -316,11 +340,18 @@ static bool update_stats(struct psi_group *group)
> >     for (s = 0; s < NR_PSI_STATES - 1; s++)
> >             group->total[s] += div_u64(deltas[s], max(nonidle_total, 1UL));
> >  
> > +   return change_mask;
> > +}
> > +
> > +static u64 calculate_averages(struct psi_group *group, u64 now)
> 
>                                                               time?
> 
> The function name is still awkward to me.
> If someone see this function, he will expect return value as "average", not 
> next_update.
> If we want to have next_update as return value, it would better to rename 
> *update_avgs*.

update_averages() would be nice, agreed.

But I disagree on the now -> time. time is really vague - could be a
random timestamp or a period. We use "now" everywhere in this code to
mean the current time (cpu clock in cpu-local paths, sched clock for
global stuff), so let's keep it here as well.

> > +{
> > +   unsigned long missed_periods = 0;
> > +   u64 expires, period;
> > +   u64 avg_next_update;
> > +   int s;
> > +
> >     /* avgX= */
> > -   now = sched_clock();
> >     expires = group->avg_next_update;
> > -   if (now < expires)
> > -           goto out;
> >     if (now - expires > psi_period)
> >             missed_periods = div_u64(now - expires, psi_period);
> >  
> > @@ -331,7 +362,7 @@ static bool update_stats(struct psi_group *group)
> >      * But the deltas we sample out of the per-cpu buckets above
> >      * are based on the actual time elapsing between clock ticks.
> >      */
> > -   group->avg_next_update = expires + ((1 + missed_periods) * psi_period);
> > +   avg_next_update = expires + ((1 + missed_periods) * psi_period);
> >     period = now - (group->avg_last_update + (missed_periods * psi_period));
> >     group->avg_last_update = now;
> >  
> > @@ -361,20 +392,237 @@ static bool update_stats(struct psi_group *group)
> >             group->avg_total[s] += sample;
> >             calc_avgs(group->avg[s], missed_periods, sample, period);
> >     }
> > -out:
> > -   mutex_unlock(&group->update_lock);
> > -   return nonidle_total;
> > +
> > +   return avg_next_update;
> > +}
> > +
> > +/* Trigger tracking window manupulations */
> > +static void window_init(struct psi_window *win, u64 now, u64 value)
> > +{
> > +   win->start_value = value;
> > +   win->start_time = now;
> > +   win->per_win_growth = 0;
> > +}
> > +
> > +/*
> > + * PSI growth tracking window update and growth calculation routine.
> 
> Let's add empty line here.

Agreed.

> > + * This approximates a sliding tracking window by interpolating
> > + * partially elapsed windows using historical growth data from the
> > + * previous intervals. This minimizes memory requirements (by not storing
> > + * all the intermediate values in the previous window) and simplifies
> > + * the calculations. It works well because PSI signal changes only in
> > + * positive direction and over relatively small window sizes the growth
> > + * is close to linear.
> > + */
> > +static u64 window_update(struct psi_window *win, u64 now, u64 value)
> 
> Hope to change now as just time for function.
>
> Insetad of value, couldn't we use more concrete naming?
> Maybe stall_time or just stall?

Disagreed on both :)

> > +{
> > +   u64 interval;
> 
> elapsed?

Hm, elapsed is a bit better, but how about period? We use that in the
averages code for the same functionality.

> > +   u64 growth;
> > +
> > +   interval = now - win->start_time;
> > +   growth = value - win->start_value;
> > +   /*
> > +    * After each tracking window passes win->start_value and
> > +    * win->start_time get reset and win->per_win_growth stores
> > +    * the average per-window growth of the previous window.
> > +    * win->per_win_growth is then used to interpolate additional
> > +    * growth from the previous window assuming it was linear.
> > +    */
> > +   if (interval > win->size) {
> > +           win->per_win_growth = growth;
> > +           win->start_value = value;
> > +           win->start_time = now;
> 
> We can use window_init via adding per_win_growth in the function
> parameter. Maybe, window_reset would be better function name.
> 
> > +   } else {
> > +           u32 unelapsed;
> 
> remaining? remained?

Yup, much better.

> > +
> > +           unelapsed = win->size - interval;
> > +           growth += div_u64(win->per_win_growth * unelapsed, win->size);
> > +   }
> > +
> > +   return growth;
> > +}
> > +
> > +static void init_triggers(struct psi_group *group, u64 now)
> > +{
> > +   struct psi_trigger *t;
> > +
> > +   list_for_each_entry(t, &group->triggers, node)
> > +           window_init(&t->win, now, group->total[t->state]);
> > +   memcpy(group->polling_total, group->total,
> > +              sizeof(group->polling_total));
> > +   group->polling_next_update = now + group->trigger_min_period;
> > +}
> > +
> > +static u64 poll_triggers(struct psi_group *group, u64 now)
> 
> How about update_[poll|trigger]_stat?

update_triggers()? The signature already matches the update_averages()
one, so we might as well do the same thing there I guess.

> > +{
> > +   struct psi_trigger *t;
> > +   bool new_stall = false;
> > +
> > +   /*
> > +    * On subsequent updates, calculate growth deltas and let
> > +    * watchers know when their specified thresholds are exceeded.
> > +    */
> > +   list_for_each_entry(t, &group->triggers, node) {
> > +           u64 growth;
> > +
> > +           /* Check for stall activity */
> > +           if (group->polling_total[t->state] == group->total[t->state])
> > +                   continue;
> > +
> > +           /*
> > +            * Multiple triggers might be looking at the same state,
> > +            * remember to update group->polling_total[] once we've
> > +            * been through all of them. Also remember to extend the
> > +            * polling time if we see new stall activity.
> > +            */
> > +           new_stall = true;
> > +
> > +           /* Calculate growth since last update */
> > +           growth = window_update(&t->win, now, group->total[t->state]);
> > +           if (growth < t->threshold)
> > +                   continue;
> > +
> > +           /* Limit event signaling to once per window */
> > +           if (now < t->last_event_time + t->win.size)
> > +                   continue;
> > +
> > +           /* Generate an event */
> > +           if (cmpxchg(&t->event, 0, 1) == 0)
> > +                   wake_up_interruptible(&t->event_wait);
> > +           t->last_event_time = now;
> > +   }
> > +
> > +   if (new_stall) {
> > +           memcpy(group->polling_total, group->total,
> > +                      sizeof(group->polling_total));
> > +   }
> > +
> > +   return now + group->trigger_min_period;
> >  }
> >  
> > +/*
> > + * psi_update_work represents slowpath accounting part while 
> > psi_group_change
> > + * represents hotpath part. There are two potential races between them:
> > + * 1. Changes to group->polling when slowpath checks for new stall, then 
> > hotpath
> > + *    records new stall and then slowpath resets group->polling flag. This 
> > leads
> > + *    to the exit from the polling mode while monitored state is still 
> > changing.
> > + * 2. Slowpath overwriting an immediate update scheduled from the hotpath 
> > with
> > + *    a regular update further in the future and missing the immediate 
> > update.
> > + * Both races are handled with a retry cycle in the slowpath:
> > + *
> > + *    HOTPATH:                         |    SLOWPATH:
> > + *                                     |
> > + * A) times[cpu] += delta              | E) delta = times[*]
> > + * B) start_poll = (delta[poll_mask] &&|    if delta[poll_mask]:
> > + *      cmpxchg(g->polling, 0, 1) == 0)| F)   polling_until = now + 
> > grace_period
> > + *    if start_poll:                   |    if now > polling_until:
> > + * C)   mod_delayed_work(1)            |      if g->polling:
> > + *     else if !delayed_work_pending():| G)     g->polling = polling = 0
> > + * D)   schedule_delayed_work(PSI_FREQ)|        smp_mb
> > + *                                     | H)     goto SLOWPATH
> > + *                                     |    else:
> > + *                                     |      if !g->polling:
> > + *                                     | I)     g->polling = polling = 1
> > + *                                     | J) if delta && first_pass:
> > + *                                     |      next_avg = 
> > calculate_averages()
> > + *                                     |      if polling:
> > + *                                     |        next_poll = poll_triggers()
> > + *                                     |    if (delta && first_pass) || 
> > polling:
> > + *                                     | K)   mod_delayed_work(
> > + *                                     |          min(next_avg, next_poll))
> > + *                                     |      if !polling:
> > + *                                     |        first_pass = false
> > + *                                     | L)     goto SLOWPATH
> > + *
> > + * Race #1 is represented by (EABGD) sequence in which case slowpath 
> > deactivates
> > + * polling mode because it misses new monitored stall and hotpath doesn't
> > + * activate it because at (B) g->polling is not yet reset by slowpath in 
> > (G).
> > + * This race is handled by the (H) retry, which in the race described above
> > + * results in the new sequence of (EABGDHEIK) that reactivates polling 
> > mode.
> > + *
> > + * Race #2 is represented by polling==false && (JABCK) sequence which 
> > overwrites
> > + * immediate update scheduled at (C) with a later (next_avg) update 
> > scheduled at
> > + * (K). This race is handled by the (L) retry which results in the new 
> > sequence
> > + * of polling==false && (JABCKLEIK) that reactivates polling mode and
> > + * reschedules the next polling update (next_poll).
> > + *
> > + * Note that retries can't result in an infinite loop because retry #1 
> > happens
> > + * only during polling reactivation and retry #2 happens only on the first 
> > pass.
> > + * Constant reactivations are impossible because polling will stay active 
> > for at
> > + * least grace_period. Worst case scenario involves two retries (HEJKLE)
> > + */
> >  static void psi_update_work(struct work_struct *work)
> >  {
> >     struct delayed_work *dwork;
> >     struct psi_group *group;
> > +   bool first_pass = true;
> > +   u64 next_update;
> > +   u32 change_mask;
> 
> How about [changed|updated]_states?

changed_states sounds good to me.

Re: [PATCH v3 5/5] psi: introduce psi monitor

Reply via email to