Implement two new kind of maps that support the peek, push and pop
operations.

A use case for this is to keep track of a pool of elements, like
network ports in a SNAT.

Signed-off-by: Mauricio Vasquez B <mauricio.vasq...@polito.it>
---
 include/linux/bpf.h           |    8 +
 include/linux/bpf_types.h     |    2 
 include/uapi/linux/bpf.h      |   36 ++++
 kernel/bpf/Makefile           |    2 
 kernel/bpf/helpers.c          |   44 +++++
 kernel/bpf/queue_stack_maps.c |  353 +++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c          |   96 +++++++++++
 kernel/bpf/verifier.c         |    6 +
 net/core/filter.c             |    6 +
 9 files changed, 543 insertions(+), 10 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 523481a3471b..1d39b9096d9f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,11 @@ struct bpf_map_ops {
        void *(*map_lookup_elem)(struct bpf_map *map, void *key);
        int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
        int (*map_delete_elem)(struct bpf_map *map, void *key);
+       void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
+
+       /* funcs callable from eBPF programs */
+       void *(*map_lookup_or_init_elem)(struct bpf_map *map, void *key,
+                                        void *value);
 
        /* funcs called by prog_array and perf_event_array map */
        void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -806,6 +811,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct 
bpf_map *map,
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index cd26c090e7c0..8d955f11f1cd 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -67,3 +67,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, queue_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 66917a4eba27..0a5b904ba42f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
        BPF_BTF_LOAD,
        BPF_BTF_GET_FD_BY_ID,
        BPF_TASK_FD_QUERY,
+       BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
@@ -127,6 +128,8 @@ enum bpf_map_type {
        BPF_MAP_TYPE_SOCKHASH,
        BPF_MAP_TYPE_CGROUP_STORAGE,
        BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+       BPF_MAP_TYPE_QUEUE,
+       BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -459,6 +462,28 @@ union bpf_attr {
  *     Return
  *             0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ *     Description
+ *             Push an element *value* in *map*. *flags* is one of:
+ *
+ *             **BPF_EXIST**
+ *             If the queue/stack is full, the oldest element is removed to
+ *             make room for this.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ *     void *bpf_map_pop_elem(struct bpf_map *map)
+ *     Description
+ *             Pop an element from *map*.
+ * Return
+ *             Pointer to the element of *NULL* if there is not any.
+ *
+ *     void *bpf_map_peek_elem(struct bpf_map *map)
+ *     Description
+ *             Return an element from *map* without removing it.
+ * Return
+ *             Pointer to the element of *NULL* if there is not any.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  *     Description
  *             For tracing programs, safely attempt to read *size* bytes from
@@ -786,14 +811,14 @@ union bpf_attr {
  *
  *                     int ret;
  *                     struct bpf_tunnel_key key = {};
- *                     
+ *
  *                     ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
  *                     if (ret < 0)
  *                             return TC_ACT_SHOT;     // drop packet
- *                     
+ *
  *                     if (key.remote_ipv4 != 0x0a000001)
  *                             return TC_ACT_SHOT;     // drop packet
- *                     
+ *
  *                     return TC_ACT_OK;               // accept packet
  *
  *             This interface can also be used with all encapsulation devices
@@ -2226,7 +2251,10 @@ union bpf_attr {
        FN(get_current_cgroup_id),      \
        FN(get_local_storage),          \
        FN(sk_select_reuseport),        \
-       FN(skb_ancestor_cgroup_id),
+       FN(skb_ancestor_cgroup_id),     \
+       FN(map_push_elem),              \
+       FN(map_pop_elem),               \
+       FN(map_peek_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 0488b8258321..17afae9e65f3 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,7 +3,7 @@ obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o 
bpf_lru_list.o lpm_trie.o map_in_map.o
-obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1991466b8327..c1ff567b69f1 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -76,6 +76,50 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
        .arg2_type      = ARG_PTR_TO_MAP_KEY,
 };
 
+BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
+{
+       WARN_ON_ONCE(!rcu_read_lock_held());
+       return map->ops->map_update_elem(map, NULL, value, flags);
+}
+
+const struct bpf_func_proto bpf_map_push_elem_proto = {
+       .func           = bpf_map_push_elem,
+       .gpl_only       = false,
+       .pkt_access     = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_CONST_MAP_PTR,
+       .arg2_type      = ARG_PTR_TO_MAP_VALUE,
+       .arg3_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_1(bpf_map_pop_elem, struct bpf_map *, map)
+{
+       WARN_ON_ONCE(!rcu_read_lock_held());
+       return (unsigned long) map->ops->map_lookup_and_delete_elem(map, NULL);
+}
+
+const struct bpf_func_proto bpf_map_pop_elem_proto = {
+       .func           = bpf_map_pop_elem,
+       .gpl_only       = false,
+       .pkt_access     = true,
+       .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
+       .arg1_type      = ARG_CONST_MAP_PTR,
+};
+
+BPF_CALL_1(bpf_map_peek_elem, struct bpf_map *, map)
+{
+       WARN_ON_ONCE(!rcu_read_lock_held());
+       return (unsigned long) map->ops->map_lookup_elem(map, NULL);
+}
+
+const struct bpf_func_proto bpf_map_peek_elem_proto = {
+       .func           = bpf_map_pop_elem,
+       .gpl_only       = false,
+       .pkt_access     = true,
+       .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
+       .arg1_type      = ARG_CONST_MAP_PTR,
+};
+
 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
        .func           = bpf_user_rnd_u32,
        .gpl_only       = false,
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
new file mode 100644
index 000000000000..97e652bf6df5
--- /dev/null
+++ b/kernel/bpf/queue_stack_maps.c
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * queue_stack_maps.c: BPF queue and stack maps
+ *
+ * Copyright (c) 2018 Politecnico di Torino
+ */
+#include <linux/bpf.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include "percpu_freelist.h"
+
+#define QUEUE_STACK_CREATE_FLAG_MASK \
+       (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+enum queue_type {
+       QUEUE,
+       STACK,
+};
+
+struct bpf_queue {
+       struct bpf_map map;
+       struct list_head head;
+       struct pcpu_freelist freelist;
+       void *nodes;
+       enum queue_type type;
+       raw_spinlock_t lock;
+       atomic_t count;
+       u32 node_size;
+};
+
+struct queue_node {
+       struct pcpu_freelist_node fnode;
+       struct bpf_queue *queue;
+       struct list_head list;
+       struct rcu_head rcu;
+       char element[0] __aligned(8);
+};
+
+static bool queue_map_is_prealloc(struct bpf_queue *queue)
+{
+       return !(queue->map.map_flags & BPF_F_NO_PREALLOC);
+}
+
+/* Called from syscall */
+static int queue_map_alloc_check(union bpf_attr *attr)
+{
+       /* check sanity of attributes */
+       if (attr->max_entries == 0 || attr->key_size != 0 ||
+           attr->value_size == 0 ||
+           attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
+               return -EINVAL;
+
+       if (attr->value_size > KMALLOC_MAX_SIZE)
+               /* if value_size is bigger, the user space won't be able to
+                * access the elements.
+                */
+               return -E2BIG;
+
+       return 0;
+}
+
+static int prealloc_init(struct bpf_queue *queue)
+{
+       u32 node_size = sizeof(struct queue_node) +
+                       round_up(queue->map.value_size, 8);
+       u32 num_entries = queue->map.max_entries;
+       int err;
+
+       queue->nodes = bpf_map_area_alloc(node_size * num_entries,
+                                         queue->map.numa_node);
+       if (!queue->nodes)
+               return -ENOMEM;
+
+       err = pcpu_freelist_init(&queue->freelist);
+       if (err)
+               goto free_nodes;
+
+       pcpu_freelist_populate(&queue->freelist,
+                              queue->nodes +
+                              offsetof(struct queue_node, fnode),
+                              node_size, num_entries);
+
+       return 0;
+
+free_nodes:
+       bpf_map_area_free(queue->nodes);
+       return err;
+}
+
+static void prealloc_destroy(struct bpf_queue *queue)
+{
+       bpf_map_area_free(queue->nodes);
+       pcpu_freelist_destroy(&queue->freelist);
+}
+
+static struct bpf_map *queue_map_alloc(union bpf_attr *attr)
+{
+       struct bpf_queue *queue;
+       u64 cost = sizeof(*queue);
+       int ret;
+
+       queue = kzalloc(sizeof(*queue), GFP_USER);
+       if (!queue)
+               return ERR_PTR(-ENOMEM);
+
+       bpf_map_init_from_attr(&queue->map, attr);
+
+       queue->node_size = sizeof(struct queue_node) +
+                          round_up(attr->value_size, 8);
+       cost += (u64) attr->max_entries * queue->node_size;
+       if (cost >= U32_MAX - PAGE_SIZE) {
+               ret = -E2BIG;
+               goto free_queue;
+       }
+
+       queue->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+       ret = bpf_map_precharge_memlock(queue->map.pages);
+       if (ret)
+               goto free_queue;
+
+       INIT_LIST_HEAD(&queue->head);
+
+       raw_spin_lock_init(&queue->lock);
+
+       if (queue->map.map_type == BPF_MAP_TYPE_QUEUE)
+               queue->type = QUEUE;
+       else if (queue->map.map_type == BPF_MAP_TYPE_STACK)
+               queue->type = STACK;
+
+       if (queue_map_is_prealloc(queue))
+               ret = prealloc_init(queue);
+               if (ret)
+                       goto free_queue;
+
+       return &queue->map;
+
+free_queue:
+       kfree(queue);
+       return ERR_PTR(ret);
+}
+
+/* Called when map->refcnt goes to zero, either from workqueue or from syscall 
*/
+static void queue_map_free(struct bpf_map *map)
+{
+       struct bpf_queue *queue = container_of(map, struct bpf_queue, map);
+       struct queue_node *l;
+
+       /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+        * so the programs (can be more than one that used this map) were
+        * disconnected from events. Wait for outstanding critical sections in
+        * these programs to complete
+        */
+       synchronize_rcu();
+
+       /* some of queue_elem_free_rcu() callbacks for elements of this map may
+        * not have executed. Wait for them.
+        */
+       rcu_barrier();
+       if (!queue_map_is_prealloc(queue))
+               list_for_each_entry(l, &queue->head, list) {
+                       list_del(&l->list);
+                       kfree(l);
+               }
+       else
+               prealloc_destroy(queue);
+       kfree(queue);
+}
+
+static void queue_elem_free_rcu(struct rcu_head *head)
+{
+       struct queue_node *l = container_of(head, struct queue_node, rcu);
+       struct bpf_queue *queue = l->queue;
+
+       /* must increment bpf_prog_active to avoid kprobe+bpf triggering while
+        * we're calling kfree, otherwise deadlock is possible if kprobes
+        * are placed somewhere inside of slub
+        */
+       preempt_disable();
+       __this_cpu_inc(bpf_prog_active);
+       if (queue_map_is_prealloc(queue))
+               pcpu_freelist_push(&queue->freelist, &l->fnode);
+       else
+               kfree(l);
+       __this_cpu_dec(bpf_prog_active);
+       preempt_enable();
+}
+
+static void *__queue_map_lookup(struct bpf_map *map, bool delete)
+{
+       struct bpf_queue *queue = container_of(map, struct bpf_queue, map);
+       unsigned long flags;
+       struct queue_node *node;
+
+       raw_spin_lock_irqsave(&queue->lock, flags);
+
+       if (list_empty(&queue->head)) {
+               raw_spin_unlock_irqrestore(&queue->lock, flags);
+               return NULL;
+       }
+
+       node = list_first_entry(&queue->head, struct queue_node, list);
+
+       if (delete) {
+               if (!queue_map_is_prealloc(queue))
+                       atomic_dec(&queue->count);
+
+               list_del(&node->list);
+               call_rcu(&node->rcu, queue_elem_free_rcu);
+       }
+
+       raw_spin_unlock_irqrestore(&queue->lock, flags);
+       return &node->element;
+}
+
+/* Called from syscall or from eBPF program */
+static void *queue_map_lookup_elem(struct bpf_map *map, void *key)
+{
+       return __queue_map_lookup(map, false);
+}
+
+/* Called from syscall or from eBPF program */
+static void *queue_map_lookup_and_delete_elem(struct bpf_map *map, void *key)
+{
+       return __queue_map_lookup(map, true);
+}
+
+static struct queue_node *queue_map_delete_oldest_node(struct bpf_queue *queue)
+{
+       struct queue_node *node = NULL;
+       unsigned long irq_flags;
+
+       raw_spin_lock_irqsave(&queue->lock, irq_flags);
+
+       if (list_empty(&queue->head))
+               goto out;
+
+       switch (queue->type) {
+       case QUEUE:
+               node = list_first_entry(&queue->head, struct queue_node, list);
+               break;
+       case STACK:
+               node = list_last_entry(&queue->head, struct queue_node, list);
+               break;
+       default:
+               goto out;
+       }
+
+       list_del(&node->list);
+out:
+       raw_spin_unlock_irqrestore(&queue->lock, irq_flags);
+       return node;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_map_update_elem(struct bpf_map *map, void *key, void *value,
+                                u64 flags)
+{
+       struct bpf_queue *queue = container_of(map, struct bpf_queue, map);
+       unsigned long irq_flags;
+       struct queue_node *new;
+       /* BPF_EXIST is used to force making room for a new element in case the
+        * map is full
+        */
+       bool replace = (flags & BPF_EXIST);
+
+       /* Check supported flags for queue and stack maps */
+       if (flags & BPF_NOEXIST || flags > BPF_EXIST)
+               return -EINVAL;
+
+again:
+       if (!queue_map_is_prealloc(queue)) {
+               if (atomic_inc_return(&queue->count) > queue->map.max_entries) {
+                       atomic_dec(&queue->count);
+                       if (!replace)
+                               return -E2BIG;
+                       new = queue_map_delete_oldest_node(queue);
+                       /* It is possible that in the meanwhile the queue/stack
+                        * became empty and there was not an 'oldest' element
+                        * to delete.  In that case, try again
+                        */
+                       if (!new)
+                               goto again;
+               } else {
+                       new = kmalloc_node(queue->node_size,
+                                          GFP_ATOMIC | __GFP_NOWARN,
+                                          queue->map.numa_node);
+                       if (!new) {
+                               atomic_dec(&queue->count);
+                               return -ENOMEM;
+                       }
+               }
+       } else {
+               struct pcpu_freelist_node *l;
+
+               l = pcpu_freelist_pop(&queue->freelist);
+               if (!l) {
+                       if (!replace)
+                               return -E2BIG;
+                       new = queue_map_delete_oldest_node(queue);
+                       if (!new)
+                       /* TODO: This should goto again, but this causes an
+                        * infinite loop when the elements are not being
+                        * returned to the free list by the
+                        * queue_elem_free_rcu() callback
+                        */
+                               return -ENOMEM;
+               } else
+                       new = container_of(l, struct queue_node, fnode);
+       }
+
+       memcpy(new->element, value, queue->map.value_size);
+       new->queue = queue;
+
+       raw_spin_lock_irqsave(&queue->lock, irq_flags);
+       switch (queue->type) {
+       case QUEUE:
+               list_add_tail(&new->list, &queue->head);
+               break;
+
+       case STACK:
+               list_add(&new->list, &queue->head);
+               break;
+       }
+       raw_spin_unlock_irqrestore(&queue->lock, irq_flags);
+
+       return 0;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_map_delete_elem(struct bpf_map *map, void *key)
+{
+       return -EINVAL;
+}
+
+/* Called from syscall */
+static int queue_map_get_next_key(struct bpf_map *map, void *key,
+                                 void *next_key)
+{
+       return -EINVAL;
+}
+
+const struct bpf_map_ops queue_map_ops = {
+       .map_alloc_check = queue_map_alloc_check,
+       .map_alloc = queue_map_alloc,
+       .map_free = queue_map_free,
+       .map_lookup_elem = queue_map_lookup_elem,
+       .map_lookup_and_delete_elem = queue_map_lookup_and_delete_elem,
+       .map_update_elem = queue_map_update_elem,
+       .map_delete_elem = queue_map_delete_elem,
+       .map_get_next_key = queue_map_get_next_key,
+};
+
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8339d81cba1d..7703795cb4e4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -652,6 +652,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void 
*key, void *value)
        return -ENOTSUPP;
 }
 
+static void *__bpf_copy_key(void *ukey, u64 key_size)
+{
+       if (key_size)
+               return memdup_user(ukey, key_size);
+
+       if (ukey)
+               return ERR_PTR(-EINVAL);
+
+       return NULL;
+}
+
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 
@@ -679,7 +690,7 @@ static int map_lookup_elem(union bpf_attr *attr)
                goto err_put;
        }
 
-       key = memdup_user(ukey, map->key_size);
+       key = __bpf_copy_key(ukey, map->key_size);
        if (IS_ERR(key)) {
                err = PTR_ERR(key);
                goto err_put;
@@ -767,7 +778,7 @@ static int map_update_elem(union bpf_attr *attr)
                goto err_put;
        }
 
-       key = memdup_user(ukey, map->key_size);
+       key = __bpf_copy_key(ukey, map->key_size);
        if (IS_ERR(key)) {
                err = PTR_ERR(key);
                goto err_put;
@@ -865,7 +876,7 @@ static int map_delete_elem(union bpf_attr *attr)
                goto err_put;
        }
 
-       key = memdup_user(ukey, map->key_size);
+       key = __bpf_copy_key(ukey, map->key_size);
        if (IS_ERR(key)) {
                err = PTR_ERR(key);
                goto err_put;
@@ -917,7 +928,7 @@ static int map_get_next_key(union bpf_attr *attr)
        }
 
        if (ukey) {
-               key = memdup_user(ukey, map->key_size);
+               key = __bpf_copy_key(ukey, map->key_size);
                if (IS_ERR(key)) {
                        err = PTR_ERR(key);
                        goto err_put;
@@ -958,6 +969,80 @@ static int map_get_next_key(union bpf_attr *attr)
        return err;
 }
 
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+
+static int map_lookup_and_delete_elem(union bpf_attr *attr)
+{
+       void __user *ukey = u64_to_user_ptr(attr->key);
+       void __user *uvalue = u64_to_user_ptr(attr->value);
+       int ufd = attr->map_fd;
+       struct bpf_map *map;
+       void *key, *value, *ptr;
+       u32 value_size;
+       struct fd f;
+       int err;
+
+       if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
+               return -EINVAL;
+
+       f = fdget(ufd);
+       map = __bpf_map_get(f);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+
+       if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+               err = -EPERM;
+               goto err_put;
+       }
+
+       key = __bpf_copy_key(ukey, map->key_size);
+       if (IS_ERR(key)) {
+               err = PTR_ERR(key);
+               goto err_put;
+       }
+
+       value_size = map->value_size;
+
+       err = -ENOMEM;
+       value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+       if (!value)
+               goto free_key;
+
+       err = -EFAULT;
+       if (copy_from_user(value, uvalue, value_size) != 0)
+               goto free_value;
+
+       /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+        * inside bpf map update or delete otherwise deadlocks are possible
+        */
+       preempt_disable();
+       __this_cpu_inc(bpf_prog_active);
+       rcu_read_lock();
+       ptr = map->ops->map_lookup_and_delete_elem(map, key);
+       if (ptr)
+               memcpy(value, ptr, value_size);
+       rcu_read_unlock();
+               err = ptr ? 0 : -ENOENT;
+       __this_cpu_dec(bpf_prog_active);
+       preempt_enable();
+
+       if (err)
+               goto free_value;
+
+       if (copy_to_user(uvalue, value, value_size) != 0)
+               goto free_value;
+
+       err = 0;
+
+free_value:
+       kfree(value);
+free_key:
+       kfree(key);
+err_put:
+       fdput(f);
+       return err;
+}
+
 static const struct bpf_prog_ops * const bpf_prog_types[] = {
 #define BPF_PROG_TYPE(_id, _name) \
        [_id] = & _name ## _prog_ops,
@@ -2418,6 +2503,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, 
uattr, unsigned int, siz
        case BPF_TASK_FD_QUERY:
                err = bpf_task_fd_query(&attr, uattr);
                break;
+       case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
+               err = map_lookup_and_delete_elem(&attr);
+               break;
        default:
                err = -EINVAL;
                break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ca90679a7fe5..5bd67feb2f07 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2035,6 +2035,7 @@ static int check_func_arg(struct bpf_verifier_env *env, 
u32 regno,
                        verbose(env, "invalid map_ptr to access map->key\n");
                        return -EACCES;
                }
+
                err = check_helper_mem_access(env, regno,
                                              meta->map_ptr->key_size, false,
                                              NULL);
@@ -2454,7 +2455,10 @@ record_func_map(struct bpf_verifier_env *env, struct 
bpf_call_arg_meta *meta,
        if (func_id != BPF_FUNC_tail_call &&
            func_id != BPF_FUNC_map_lookup_elem &&
            func_id != BPF_FUNC_map_update_elem &&
-           func_id != BPF_FUNC_map_delete_elem)
+           func_id != BPF_FUNC_map_delete_elem &&
+           func_id != BPF_FUNC_map_push_elem &&
+           func_id != BPF_FUNC_map_pop_elem &&
+           func_id != BPF_FUNC_map_peek_elem)
                return 0;
 
        if (meta->map_ptr == NULL) {
diff --git a/net/core/filter.c b/net/core/filter.c
index fd423ce3da34..e7860914ffc7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4830,6 +4830,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
                return &bpf_map_update_elem_proto;
        case BPF_FUNC_map_delete_elem:
                return &bpf_map_delete_elem_proto;
+       case BPF_FUNC_map_push_elem:
+               return &bpf_map_push_elem_proto;
+       case BPF_FUNC_map_pop_elem:
+               return &bpf_map_pop_elem_proto;
+       case BPF_FUNC_map_peek_elem:
+               return &bpf_map_peek_elem_proto;
        case BPF_FUNC_get_prandom_u32:
                return &bpf_get_prandom_u32_proto;
        case BPF_FUNC_get_smp_processor_id:

Reply via email to