Hi,

On 12/22/2023 8:21 PM, Philo Lu wrote:
> BPF_MAP_TYPE_RELAY is implemented based on relay interface, which
> creates per-cpu buffer to transfer data. Each buffer is essentially a
> list of fix-sized sub-buffers, and is exposed to user space as files in
> debugfs. attr->max_entries is used as subbuf size and attr->map_extra is
> used as subbuf num. Currently, the default value of subbuf num is 8.
>
> The data can be accessed by read or mmap via these files. For example,
> if there are 2 cpus, files could be `/sys/kernel/debug/mydir/my_rmap0`
> and `/sys/kernel/debug/mydir/my_rmap1`.
>
> Buffer-only mode is used to create the relay map, which just allocates
> the buffer without creating user-space files. Then user can setup the
> files with map_update_elem, thus allowing user to define the directory
> name in debugfs. map_update_elem is implemented in the following patch.
>
> A new map flag named BPF_F_OVERWRITE is introduced to set overwrite mode
> of relay map.

Beside adding a new map type, could we consider only use kfuncs to
support the creation of rchan and the write of rchan ? I think
bpf_cpumask will be a good reference.
>
> Signed-off-by: Philo Lu <lu...@linux.alibaba.com>
> ---
>  include/linux/bpf_types.h |   3 +
>  include/uapi/linux/bpf.h  |   7 ++
>  kernel/bpf/Makefile       |   3 +
>  kernel/bpf/relaymap.c     | 157 ++++++++++++++++++++++++++++++++++++++
>  kernel/bpf/syscall.c      |   1 +
>  5 files changed, 171 insertions(+)
>  create mode 100644 kernel/bpf/relaymap.c
>

SNIP
> diff --git a/kernel/bpf/relaymap.c b/kernel/bpf/relaymap.c
> new file mode 100644
> index 000000000000..d0adc7f67758
> --- /dev/null
> +++ b/kernel/bpf/relaymap.c
> @@ -0,0 +1,157 @@
> +#include <linux/cpumask.h>
> +#include <linux/debugfs.h>
> +#include <linux/filter.h>
> +#include <linux/relay.h>
> +#include <linux/slab.h>
> +#include <linux/bpf.h>
> +#include <linux/err.h>
> +
> +#define RELAY_CREATE_FLAG_MASK (BPF_F_OVERWRITE)
> +
> +struct bpf_relay_map {
> +     struct bpf_map map;
> +     struct rchan *relay_chan;
> +     struct rchan_callbacks relay_cb;
> +};

It seems that there is no need to add relay_cb in bpf_relay_map. We
could define two kinds of rchan_callbacks: one for non-overwrite mode
and another one for overwrite mode.
> +
> +static struct dentry *create_buf_file_handler(const char *filename,
> +                                                        struct dentry 
> *parent, umode_t mode,
> +                                                        struct rchan_buf 
> *buf, int *is_global)
> +{
> +     /* Because we do relay_late_setup_files(), create_buf_file(NULL, NULL, 
> ...)
> +      * will be called by relay_open.
> +      */
> +     if (!filename)
> +             return NULL;
> +
> +     return debugfs_create_file(filename, mode, parent, buf,
> +                                &relay_file_operations);
> +}
> +
> +static int remove_buf_file_handler(struct dentry *dentry)
> +{
> +     debugfs_remove(dentry);
> +     return 0;
> +}
> +
> +/* For non-overwrite, use default subbuf_start cb */
> +static int subbuf_start_overwrite(struct rchan_buf *buf, void *subbuf,
> +                                                void *prev_subbuf, size_t 
> prev_padding)
> +{
> +     return 1;
> +}
> +
> +/* bpf_attr is used as follows:
> + * - key size: must be 0
> + * - value size: value will be used as directory name by map_update_elem
> + *   (to create relay files). If passed as 0, it will be set to NAME_MAX as
> + *   default
> + *
> + * - max_entries: subbuf size
> + * - map_extra: subbuf num, default as 8
> + *
> + * When alloc, we do not set up relay files considering dir_name conflicts.
> + * Instead we use relay_late_setup_files() in map_update_elem(), and thus the
> + * value is used as dir_name, and map->name is used as base_filename.
> + */
> +static struct bpf_map *relay_map_alloc(union bpf_attr *attr)
> +{
> +     struct bpf_relay_map *rmap;
> +
> +     if (unlikely(attr->map_flags & ~RELAY_CREATE_FLAG_MASK))
> +             return ERR_PTR(-EINVAL);
> +
> +     /* key size must be 0 in relay map */
> +     if (unlikely(attr->key_size))
> +             return ERR_PTR(-EINVAL);
> +
> +     if (unlikely(attr->value_size > NAME_MAX)) {
> +             pr_warn("value_size should be no more than %d\n", NAME_MAX);
> +             return ERR_PTR(-EINVAL);
> +     } else if (attr->value_size == 0)
> +             attr->value_size = NAME_MAX;
> +
> +     /* set default subbuf num */
> +     attr->map_extra = attr->map_extra & UINT_MAX;

Should we reject invalid map_extra and return -EINVAL instead ?
> +     if (!attr->map_extra)
> +             attr->map_extra = 8;
> +
> +     if (!attr->map_name || strlen(attr->map_name) == 0)
> +             return ERR_PTR(-EINVAL);
> +
> +     rmap = bpf_map_area_alloc(sizeof(*rmap), NUMA_NO_NODE);
> +     if (!rmap)
> +             return ERR_PTR(-ENOMEM);
> +
> +     bpf_map_init_from_attr(&rmap->map, attr);
> +
> +     rmap->relay_cb.create_buf_file = create_buf_file_handler;
> +     rmap->relay_cb.remove_buf_file = remove_buf_file_handler;
> +     if (attr->map_flags & BPF_F_OVERWRITE)
> +             rmap->relay_cb.subbuf_start = subbuf_start_overwrite;
> +
> +     rmap->relay_chan = relay_open(NULL, NULL,
> +                                                     attr->max_entries, 
> attr->map_extra,
> +                                                     &rmap->relay_cb, NULL);
> +     if (!rmap->relay_chan)
> +             return ERR_PTR(-EINVAL);

Need to free rmap.
> +
> +     return &rmap->map;
> +}
> +
> +static void relay_map_free(struct bpf_map *map)
> +{
> +     struct bpf_relay_map *rmap;
> +
> +     rmap = container_of(map, struct bpf_relay_map, map);
> +     relay_close(rmap->relay_chan);
> +     debugfs_remove_recursive(rmap->relay_chan->parent);

rmap->relay_chan may be freed by relay_close(), so it is not safe to
dereference relay_chan->parent here. And is debugfs_remove_recursive()
necessary here ?
> +     kfree(rmap);
> +}
> +
> +static void *relay_map_lookup_elem(struct bpf_map *map, void *key)
> +{
> +     return ERR_PTR(-EOPNOTSUPP);
> +}
> +
> +static long relay_map_update_elem(struct bpf_map *map, void *key, void 
> *value,
> +                                u64 flags)
> +{
> +     return -EOPNOTSUPP;
> +}
> +
> +static long relay_map_delete_elem(struct bpf_map *map, void *key)
> +{
> +     return -EOPNOTSUPP;
> +}
> +
> +static int relay_map_get_next_key(struct bpf_map *map, void *key,
> +                                 void *next_key)
> +{
> +     return -EOPNOTSUPP;
> +}
> +
> +static u64 relay_map_mem_usage(const struct bpf_map *map)
> +{
> +     struct bpf_relay_map *rmap;
> +     u64 usage = sizeof(struct bpf_relay_map);
> +
> +     rmap = container_of(map, struct bpf_relay_map, map);
> +     usage += sizeof(struct rchan);
> +     usage += (sizeof(struct rchan_buf) + rmap->relay_chan->alloc_size)
> +                      * num_online_cpus();
> +     return usage;
> +}
> +
> +BTF_ID_LIST_SINGLE(relay_map_btf_ids, struct, bpf_relay_map)
> +const struct bpf_map_ops relay_map_ops = {
> +     .map_meta_equal = bpf_map_meta_equal,
> +     .map_alloc = relay_map_alloc,
> +     .map_free = relay_map_free,
> +     .map_lookup_elem = relay_map_lookup_elem,
> +     .map_update_elem = relay_map_update_elem,
> +     .map_delete_elem = relay_map_delete_elem,
> +     .map_get_next_key = relay_map_get_next_key,
> +     .map_mem_usage = relay_map_mem_usage,
> +     .map_btf_id = &relay_map_btf_ids[0],
> +};
>


Reply via email to