Re: [PATCHv6 1/3] rdmacg: Added rdma cgroup controller

Haggai Eran Wed, 24 Feb 2016 08:50:55 -0800

Hi,

Overall I the patch looks good to me. I have a few comments below.


On 20/02/2016 13:00, Parav Pandit wrote:
> Resource pool is created/destroyed dynamically whenever
> charging/uncharging occurs respectively and whenever user
> configuration is done. Its a tradeoff of memory vs little more code
Its -> It's
> space that creates resource pool whenever necessary,
> instead of creating them during cgroup creation and device registration
> time.
> 
> Signed-off-by: Parav Pandit <pandit.pa...@gmail.com>

> diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h
> new file mode 100644
> index 0000000..b370733
> --- /dev/null
> +++ b/include/linux/cgroup_rdma.h

> +struct rdmacg_device {
> +     struct rdmacg_pool_info pool_info;
> +     struct list_head        rdmacg_list;
> +     struct list_head        rpool_head;
> +     spinlock_t              rpool_lock;     /* protects rsource pool list */
rsource -> resource

> +     char                    *name;
> +};
> +
> +/* APIs for RDMA/IB stack to publish when a device wants to
> + * participate in resource accounting
> + */
> +int rdmacg_register_device(struct rdmacg_device *device);
> +void rdmacg_unregister_device(struct rdmacg_device *device);
> +
> +/* APIs for RDMA/IB stack to charge/uncharge pool specific resources */
> +int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
> +                   struct rdmacg_device *device,
> +                   int resource_index,
> +                   int num);
> +void rdmacg_uncharge(struct rdma_cgroup *cg,
> +                  struct rdmacg_device *device,
> +                  int resource_index,
> +                  int num);
> +void rdmacg_query_limit(struct rdmacg_device *device,
> +                     int *limits, int max_count);
You can drop the max_count parameter, and require the caller to
always provide pool_info->table_len items, couldn't you?

> +
> +#endif       /* CONFIG_CGROUP_RDMA */
> +#endif       /* _CGROUP_RDMA_H */
> diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
> index 0df0336a..d0e597c 100644
> --- a/include/linux/cgroup_subsys.h
> +++ b/include/linux/cgroup_subsys.h
> @@ -56,6 +56,10 @@ SUBSYS(hugetlb)
>  SUBSYS(pids)
>  #endif
>  
> +#if IS_ENABLED(CONFIG_CGROUP_RDMA)
> +SUBSYS(rdma)
> +#endif
> +
>  /*
>   * The following subsystems are not supported on the default hierarchy.
>   */
> diff --git a/init/Kconfig b/init/Kconfig
> index 2232080..1741b65 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1054,6 +1054,16 @@ config CGROUP_PIDS
>         since the PIDs limit only affects a process's ability to fork, not to
>         attach to a cgroup.
>  
> +config CGROUP_RDMA
> +     bool "RDMA controller"
> +     help
> +       Provides enforcement of RDMA resources defined by IB stack.
> +       It is fairly easy for consumers to exhaust RDMA resources, which
> +       can result into resource unavailibility to other consumers.
unavailibility -> unavailability
> +       RDMA controller is designed to stop this from happening.
> +       Attaching processes with active RDMA resources to the cgroup
> +       hierarchy is allowed even if can cross the hierarchy's limit.
> +
>  config CGROUP_FREEZER
>       bool "Freezer controller"
>       help
> diff --git a/kernel/Makefile b/kernel/Makefile
> index baa55e5..501f5df 100644

> +/**
> + * uncharge_cg_resource - uncharge resource for rdma cgroup
> + * @cg: pointer to cg to uncharge and all parents in hierarchy
It only uncharges a single cg, right?
> + * @device: pointer to ib device
> + * @index: index of the resource to uncharge in cg (resource pool)
> + * @num: the number of rdma resource to uncharge
> + *
> + * It also frees the resource pool in the hierarchy for the resource pool
> + * which was created as part of charing operation.
charing -> charging
> + */
> +static void uncharge_cg_resource(struct rdma_cgroup *cg,
> +                              struct rdmacg_device *device,
> +                              int index, int num)
> +{
> +     struct rdmacg_resource_pool *rpool;
> +     struct rdmacg_pool_info *pool_info = &device->pool_info;
> +
> +     spin_lock(&cg->rpool_list_lock);
> +     rpool = find_cg_rpool_locked(cg, device);
Is it possible for rpool to be NULL?

> +
> +     /*
> +      * A negative count (or overflow) is invalid,
> +      * it indicates a bug in the rdma controller.
> +      */
> +     rpool->resources[index].usage -= num;
> +
> +     WARN_ON_ONCE(rpool->resources[index].usage < 0);
> +     rpool->refcnt--;
> +     if (rpool->refcnt == 0 && rpool->num_max_cnt == pool_info->table_len) {
> +             /*
> +              * No user of the rpool and all entries are set to max, so
> +              * safe to delete this rpool.
> +              */
> +             list_del(&rpool->cg_list);
> +             spin_unlock(&cg->rpool_list_lock);
> +
> +             free_cg_rpool(rpool);
> +             return;
> +     }
> +     spin_unlock(&cg->rpool_list_lock);
> +}

> +/**
> + * charge_cg_resource - charge resource for rdma cgroup
> + * @cg: pointer to cg to charge
> + * @device: pointer to rdmacg device
> + * @index: index of the resource to charge in cg (resource pool)
> + * @num: the number of rdma resource to charge
> + */
> +static int charge_cg_resource(struct rdma_cgroup *cg,
> +                           struct rdmacg_device *device,
> +                           int index, int num)
> +{
> +     struct rdmacg_resource_pool *rpool;
> +     s64 new;
> +     int ret = 0;
> +
> +retry:
> +     spin_lock(&cg->rpool_list_lock);
> +     rpool = find_cg_rpool_locked(cg, device);
> +     if (!rpool) {
> +             spin_unlock(&cg->rpool_list_lock);
> +             ret = alloc_cg_rpool(cg, device);
> +             if (ret)
> +                     goto err;
> +             else
> +                     goto retry;
Instead of retrying after allocation of a new rpool, why not just return the
newly allocated rpool (or the existing one) from alloc_cg_rpool?

> +     }
> +     new = num + rpool->resources[index].usage;
> +     if (new > rpool->resources[index].max) {
> +             ret = -EAGAIN;
> +     } else {
> +             rpool->refcnt++;
> +             rpool->resources[index].usage = new;
> +     }
> +     spin_unlock(&cg->rpool_list_lock);
> +err:
> +     return ret;
> +}

> +static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
> +                                    char *buf, size_t nbytes, loff_t off)
> +{
> +     struct rdma_cgroup *cg = css_rdmacg(of_css(of));
> +     const char *dev_name;
> +     struct rdmacg_resource_pool *rpool;
> +     struct rdmacg_device *device;
> +     char *options = strstrip(buf);
> +     struct rdmacg_pool_info *pool_info;
> +     u64 enables = 0;
This limits the number of resources to 64. Sounds fine to me, but I think 
there should be a check somewhere (maybe in rdmacg_register_device()?) to
make sure someone doesn't pass too many resources.
 
> +     int *new_limits;
> +     int i = 0, ret = 0;
> +
> +     /* extract the device name first */
> +     dev_name = strsep(&options, " ");
> +     if (!dev_name) {
> +             ret = -EINVAL;
> +             goto err;
> +     }
> +
> +     /* acquire lock to synchronize with hot plug devices */
> +     mutex_lock(&dev_mutex);
> +
> +     device = rdmacg_get_device_locked(dev_name);
> +     if (!device) {
> +             ret = -ENODEV;
> +             goto parse_err;
> +     }
> +
> +     pool_info = &device->pool_info;
> +
> +     new_limits = kcalloc(pool_info->table_len, sizeof(int), GFP_KERNEL);
> +     if (!new_limits) {
> +             ret = -ENOMEM;
> +             goto parse_err;
> +     }
> +
> +     ret = rdmacg_parse_limits(options, pool_info, new_limits, &enables);
> +     if (ret)
> +             goto opt_err;
> +
> +retry:
> +     spin_lock(&cg->rpool_list_lock);
> +     rpool = find_cg_rpool_locked(cg, device);
> +     if (!rpool) {
> +             spin_unlock(&cg->rpool_list_lock);
> +             ret = alloc_cg_rpool(cg, device);
> +             if (ret)
> +                     goto opt_err;
> +             else
> +                     goto retry;
You can avoid the retry here too. Perhaps this can go into a function.

> +     }
> +
> +     /* now set the new limits of the rpool */
> +     while (enables) {
> +             /* if user set the limit, enables bit is set */
> +             if (enables & BIT(i)) {
> +                     enables &= ~BIT(i);
> +                     set_resource_limit(rpool, i, new_limits[i]);
> +             }
> +             i++;
> +     }
> +     if (rpool->refcnt == 0 &&
> +         rpool->num_max_cnt == pool_info->table_len) {
> +             /*
> +              * No user of the rpool and all entries are
> +              * set to max, so safe to delete this rpool.
> +              */
> +             list_del(&rpool->cg_list);
> +             spin_unlock(&cg->rpool_list_lock);
> +             free_cg_rpool(rpool);
> +     } else {
> +             spin_unlock(&cg->rpool_list_lock);
> +     }
You should consider putting this piece of code in a function (the
check of the reference counts and release of the rpool).

> +
> +opt_err:
> +     kfree(new_limits);
> +parse_err:
> +     mutex_unlock(&dev_mutex);
> +err:
> +     return ret ?: nbytes;
> +}
> +

> +
> +static int print_rpool_values(struct seq_file *sf,
This can return void.

> +                           struct rdmacg_pool_info *pool_info,
> +                           u32 *value_tbl)
> +{
> +     int i;
> +
> +     for (i = 0; i < pool_info->table_len; i++) {
> +             seq_puts(sf, pool_info->resource_name_table[i]);
> +             seq_putc(sf, '=');
> +             if (value_tbl[i] == S32_MAX)
> +                     seq_puts(sf, RDMACG_MAX_STR);
> +             else
> +                     seq_printf(sf, "%d", value_tbl[i]);
> +             seq_putc(sf, ' ');
> +     }
> +     return 0;
> +}
> +

Thanks,
Haggai
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHv6 1/3] rdmacg: Added rdma cgroup controller

Reply via email to