On Fri, Mar 06, 2015 at 12:45:57PM +1100, Aleksa Sarai wrote:
> +struct pids {

This name is way too generic.  Please make it clear it's part of a
cgroup controller.

> +     struct pids *parent;
> +     struct cgroup_subsys_state css;

Please make css the first element.  The above prevents css <-> pids
pointer conversions from being noop.

> +
> +     atomic_long_t counter;
> +     long limit;

Why are these long?

> +};
> +
> +static inline struct pids *css_pids(struct cgroup_subsys_state *css)

No need for explicit inlines.

> +{
> +     return css ? container_of(css, struct pids, css) : NULL;
> +}
> +
> +static inline struct pids *task_pids(struct task_struct *task)
> +{
> +     return css_pids(task_css(task, pids_cgrp_id));
> +}
> +
> +static struct pids *parent_pids(struct pids *pids)
> +{
> +     return css_pids(pids->css.parent);
> +}

For all the above functions.

> +static int pids_css_online(struct cgroup_subsys_state *css)
> +{
> +     struct pids *pids = css_pids(css);
> +     long limit = -1;
> +
> +     pids->parent = parent_pids(pids);
> +     if (pids->parent)
> +             limit = pids->parent->limit;
> +
> +     pids->limit = limit;

Why would a child inherit the setting of the parent?  It's already
hierarchically limited by the parent.  There's no point in inheriting
the setting itself.

> +     atomic_long_set(&pids->counter, 0);
> +     return 0;
> +}
> +
> +static void pids_css_free(struct cgroup_subsys_state *css)
> +{
> +     kfree(css_pids(css));
> +}
> +
> +/**
> + * pids_cancel - uncharge the local pid count
> + * @pids: the pid cgroup state
> + * @num: the number of pids to cancel
> + *
> + * This function will WARN if the pid count goes under 0,
> + * but will not prevent it.
> + */
> +static void pids_cancel(struct pids *pids, int num)
> +{
> +     long new;
> +
> +     new = atomic_long_sub_return(num, &pids->counter);
> +
> +     /*
> +      * A negative count is invalid, but pids_cancel() can't fail.
> +      * So just emit a WARN.
> +      */
> +     WARN_ON(new < 0);

WARN_ON_ONCE() would be better.  Also, if you're gonna warn against
underflow, why not warn about overflow?  Just use
WARN_ON_ONCE(atomic_add_negative()).

> +}
> +
> +/**
> + * pids_charge - hierarchically uncharge the pid count
> + * @pids: the pid cgroup state
> + * @num: the number of pids to uncharge
> + *
> + * This function will not allow the pid count to go under 0,
> + * and will WARN if a caller attempts to do so.
> + */
> +static void pids_uncharge(struct pids *pids, int num)
> +{
> +     struct pids *p;
> +
> +     for (p = pids; p; p = p->parent)
> +             pids_cancel(p, num);
> +}

Does pids limit make sense in the root cgroup?

> +static int pids_try_charge(struct pids *pids, int num)
> +{
> +     struct pids *p, *fail;
> +
> +     for (p = pids; p; p = p->parent) {
> +             long new;
> +
> +             new = atomic_long_add_return(num, &p->counter);
> +
> +             if (p->limit == PIDS_UNLIMITED)
> +                     continue;

Huh?  So, the counter stays out of sync if unlimited?  What happens
when it gets set to something else later?

> +
> +             if (new > p->limit) {
> +                     atomic_long_sub(num, &p->counter);
> +                     fail = p;
> +                     goto revert;
> +             }
> +     }
> +
> +     return 0;
> +
> +revert:
> +     for (p = pids; p != fail; p = p->parent)
> +             pids_cancel(pids, num);
> +
> +     return -EAGAIN;
> +}
...
> +static void pids_exit(struct cgroup_subsys_state *css,
> +                   struct cgroup_subsys_state *old_css,
> +                   struct task_struct *task)
> +{
> +     struct pids *pids = css_pids(old_css);
> +
> +     /*
> +      * cgroup_exit() gets called as part of the cleanup code when
> +      * copy_process() fails. This should ignored, because the
> +      * pids_cancel_fork callback already deals with the cgroup failed fork
> +      * case.
> +      */

Do we even need cancel call then?

> +     if (!(task->flags & PF_EXITING))
> +             return;
> +
> +     pids_uncharge(pids, 1);
> +}
> +
> +static int pids_write_max(struct cgroup_subsys_state *css,
> +                       struct cftype *cft, s64 val)
> +{
> +     struct pids *pids = css_pids(css);
> +
> +     /* PIDS_UNLIMITED is the only legal negative value. */
> +     if (val < 0 && val != PIDS_UNLIMITED)
> +             return -EINVAL;

Ugh... let's please not do negatives.  Please input and output "max"
for no limit conditions.

> +     /*
> +      * Limit updates don't need to be mutex'd, since they
> +      * are more of a "soft" limit in the sense that you can
> +      * set a limit which is smaller than the current count
> +      * to stop any *new* processes from spawning.
> +      */
> +     pids->limit = val;

So, on 32bit machines, we're assigning 64bit inte to 32bit after
ensuring the 64bit is a positive number?

Overall, I'm not too confident this is going well.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to