[PATCH bpf-next v3 7/7] selftests/bpf: add test cases for queue and stack maps

2018-10-18 Thread Mauricio Vasquez B
test_maps:
Tests that queue/stack maps are behaving correctly even in corner cases

test_progs:
Tests new ebpf helpers

Signed-off-by: Mauricio Vasquez B 
---
 tools/lib/bpf/bpf.c|   12 ++
 tools/lib/bpf/bpf.h|2 
 tools/testing/selftests/bpf/Makefile   |5 +
 tools/testing/selftests/bpf/bpf_helpers.h  |7 +
 tools/testing/selftests/bpf/test_maps.c|  122 
 tools/testing/selftests/bpf/test_progs.c   |   99 
 tools/testing/selftests/bpf/test_queue_map.c   |4 +
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 ++
 tools/testing/selftests/bpf/test_stack_map.c   |4 +
 9 files changed, 313 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d70a255cb05e..03f9bcc4ef50 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -278,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void 
*value)
return sys_bpf(BPF_MAP_LOOKUP_ELEM, , sizeof(attr));
 }
 
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
+{
+   union bpf_attr attr;
+
+   bzero(, sizeof(attr));
+   attr.map_fd = fd;
+   attr.key = ptr_to_u64(key);
+   attr.value = ptr_to_u64(value);
+
+   return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, , sizeof(attr));
+}
+
 int bpf_map_delete_elem(int fd, const void *key)
 {
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 258c3c178333..26a51538213c 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -99,6 +99,8 @@ LIBBPF_API int bpf_map_update_elem(int fd, const void *key, 
const void *value,
   __u64 flags);
 
 LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
+LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
+ void *value);
 LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
 LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index d99dd6fc3fbe..e39dfb4e7970 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -37,7 +37,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o 
test_tcp_estats.o test
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o 
test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
-   test_sk_lookup_kern.o test_xdp_vlan.o
+   test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -118,6 +118,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
+$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
+$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h 
b/tools/testing/selftests/bpf/bpf_helpers.h
index fda8c162d0df..6407a3df0f3b 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void 
*value,
(void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+   unsigned long long flags) =
+   (void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value) =
+   (void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value) =
+   (void *) BPF_FUNC_map_peek_elem;
 static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
(void *) BPF_FUNC_probe_read;
 static unsigned long long (*bpf_ktime_get_ns)(void) =
diff --git a/tools/testing/selftests/bpf/test_maps.c 
b/tools/testing/selftests/bpf/test_maps.c
index 9b552c0fc47d..4db2116e52be 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 

[PATCH bpf-next v3 4/7] bpf: add queue and stack maps

2018-10-18 Thread Mauricio Vasquez B
Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs.
These maps support peek, pop and push operations that are exposed to eBPF
programs through the new bpf_map[peek/pop/push] helpers.  Those operations
are exposed to userspace applications through the already existing
syscalls in the following way:

BPF_MAP_LOOKUP_ELEM-> peek
BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop
BPF_MAP_UPDATE_ELEM-> push

Queue/stack maps are implemented using a buffer, tail and head indexes,
hence BPF_F_NO_PREALLOC is not supported.

As opposite to other maps, queue and stack do not use RCU for protecting
maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE
argument that is a pointer to a memory zone where to save the value of a
map.  Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not
be passed as an extra argument.

Our main motivation for implementing queue/stack maps was to keep track
of a pool of elements, like network ports in a SNAT, however we forsee
other use cases, like for exampling saving last N kernel events in a map
and then analysing from userspace.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h   |6 +
 include/linux/bpf_types.h |2 
 include/uapi/linux/bpf.h  |   29 
 kernel/bpf/Makefile   |2 
 kernel/bpf/core.c |3 
 kernel/bpf/helpers.c  |   43 ++
 kernel/bpf/queue_stack_maps.c |  288 +
 kernel/bpf/syscall.c  |6 +
 kernel/bpf/verifier.c |   19 +++
 net/core/filter.c |6 +
 10 files changed, 401 insertions(+), 3 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0f8b863e0229..33014ae73103 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,9 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
+   int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+   int (*map_pop_elem)(struct bpf_map *map, void *value);
+   int (*map_peek_elem)(struct bpf_map *map, void *value);
 
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -811,6 +814,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct 
bpf_map *map,
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 7bad4e1947ed..44d9ab4809bd 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..b8fc161c5b78 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -128,6 +128,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -462,6 +464,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safel

[PATCH bpf-next v3 2/7] bpf/syscall: allow key to be null in map functions

2018-10-18 Thread Mauricio Vasquez B
This commit adds the required logic to allow key being NULL
in case the key_size of the map is 0.

A new __bpf_copy_key function helper only copies the key from
userpsace when key_size != 0, otherwise it enforces that key must be
null.

Signed-off-by: Mauricio Vasquez B 
Acked-by: Song Liu 
---
 kernel/bpf/syscall.c |   19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f4ecd6ed2252..78d9dd95e25f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -651,6 +651,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void 
*key, void *value)
return -ENOTSUPP;
 }
 
+static void *__bpf_copy_key(void __user *ukey, u64 key_size)
+{
+   if (key_size)
+   return memdup_user(ukey, key_size);
+
+   if (ukey)
+   return ERR_PTR(-EINVAL);
+
+   return NULL;
+}
+
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 
@@ -678,7 +689,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -785,7 +796,7 @@ static int map_update_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -888,7 +899,7 @@ static int map_delete_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -941,7 +952,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
 
if (ukey) {
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;



[PATCH bpf-next v3 1/7] bpf: rename stack trace map operations

2018-10-18 Thread Mauricio Vasquez B
In the following patches queue and stack maps (FIFO and LIFO
datastructures) will be implemented.  In order to avoid confusion and
a possible name clash rename stack_map_ops to stack_trace_map_ops

Signed-off-by: Mauricio Vasquez B 
Acked-by: Song Liu 
---
 include/linux/bpf_types.h |2 +-
 kernel/bpf/stackmap.c |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index fa48343a5ea1..7bad4e1947ed 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
 #ifdef CONFIG_PERF_EVENTS
-BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b2ade10f7ec3..90daf285de03 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -600,7 +600,7 @@ static void stack_map_free(struct bpf_map *map)
put_callchain_buffers();
 }
 
-const struct bpf_map_ops stack_map_ops = {
+const struct bpf_map_ops stack_trace_map_ops = {
.map_alloc = stack_map_alloc,
.map_free = stack_map_free,
.map_get_next_key = stack_map_get_next_key,



[PATCH bpf-next v3 5/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall

2018-10-18 Thread Mauricio Vasquez B
The previous patch implemented a bpf queue/stack maps that
provided the peek/pop/push functions.  There is not a direct
relationship between those functions and the current maps
syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added,
this is mapped to the pop operation in the queue/stack maps
and it is still to implement in other kind of maps.

Signed-off-by: Mauricio Vasquez B 
---
 include/uapi/linux/bpf.h |1 +
 kernel/bpf/syscall.c |   66 ++
 2 files changed, 67 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b8fc161c5b78..c8824d5364ff 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1617407f9ee5..49ae64a26562 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -999,6 +999,69 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
 }
 
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+
+static int map_lookup_and_delete_elem(union bpf_attr *attr)
+{
+   void __user *ukey = u64_to_user_ptr(attr->key);
+   void __user *uvalue = u64_to_user_ptr(attr->value);
+   int ufd = attr->map_fd;
+   struct bpf_map *map;
+   void *key, *value, *ptr;
+   u32 value_size;
+   struct fd f;
+   int err;
+
+   if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
+   return -EINVAL;
+
+   f = fdget(ufd);
+   map = __bpf_map_get(f);
+   if (IS_ERR(map))
+   return PTR_ERR(map);
+
+   if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+   err = -EPERM;
+   goto err_put;
+   }
+
+   key = __bpf_copy_key(ukey, map->key_size);
+   if (IS_ERR(key)) {
+   err = PTR_ERR(key);
+   goto err_put;
+   }
+
+   value_size = map->value_size;
+
+   err = -ENOMEM;
+   value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+   if (!value)
+   goto free_key;
+
+   if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+   map->map_type == BPF_MAP_TYPE_STACK) {
+   err = map->ops->map_pop_elem(map, value);
+   } else {
+   err = -ENOTSUPP;
+   }
+
+   if (err)
+   goto free_value;
+
+   if (copy_to_user(uvalue, value, value_size) != 0)
+   goto free_value;
+
+   err = 0;
+
+free_value:
+   kfree(value);
+free_key:
+   kfree(key);
+err_put:
+   fdput(f);
+   return err;
+}
+
 static const struct bpf_prog_ops * const bpf_prog_types[] = {
 #define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
@@ -2472,6 +2535,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, 
uattr, unsigned int, siz
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(, uattr);
break;
+   case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
+   err = map_lookup_and_delete_elem();
+   break;
default:
err = -EINVAL;
break;



[PATCH bpf-next v3 6/7] Sync uapi/bpf.h to tools/include

2018-10-18 Thread Mauricio Vasquez B
Sync both files.

Signed-off-by: Mauricio Vasquez B 
Acked-by: Song Liu 
---
 tools/include/uapi/linux/bpf.h |   30 +-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f9187b41dff6..c8824d5364ff 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
@@ -128,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -462,6 +465,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safely attempt to read *size* bytes from
@@ -2303,7 +2328,10 @@ union bpf_attr {
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp),  \
FN(sk_lookup_udp),  \
-   FN(sk_release),
+   FN(sk_release), \
+   FN(map_push_elem),  \
+   FN(map_pop_elem),   \
+   FN(map_peek_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call



[PATCH bpf-next v3 3/7] bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE

2018-10-18 Thread Mauricio Vasquez B
ARG_PTR_TO_UNINIT_MAP_VALUE argument is a pointer to a memory zone
used to save the value of a map.  Basically the same as
ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra
argument.

This will be used in the following patch that implements some new
helpers that receive a pointer to be filled with a map value.

Signed-off-by: Mauricio Vasquez B 
Acked-by: Song Liu 
---
 include/linux/bpf.h   |1 +
 kernel/bpf/verifier.c |9 ++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e60fff48288b..0f8b863e0229 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -138,6 +138,7 @@ enum bpf_arg_type {
ARG_CONST_MAP_PTR,  /* const argument used as pointer to bpf_map */
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE,   /* pointer to stack used as map value */
+   ARG_PTR_TO_UNINIT_MAP_VALUE,/* pointer to valid memory used to 
store a map value */
 
/* the following constraints used to prototype bpf_memcmp() and other
 * functions that access data on eBPF program stack
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3f93a548a642..d84c91ac3b70 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2117,7 +2117,8 @@ static int check_func_arg(struct bpf_verifier_env *env, 
u32 regno,
}
 
if (arg_type == ARG_PTR_TO_MAP_KEY ||
-   arg_type == ARG_PTR_TO_MAP_VALUE) {
+   arg_type == ARG_PTR_TO_MAP_VALUE ||
+   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
expected_type = PTR_TO_STACK;
if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
type != expected_type)
@@ -2187,7 +2188,8 @@ static int check_func_arg(struct bpf_verifier_env *env, 
u32 regno,
err = check_helper_mem_access(env, regno,
  meta->map_ptr->key_size, false,
  NULL);
-   } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
+   } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
+  arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
 * check [value, value + map->value_size) validity
 */
@@ -2196,9 +2198,10 @@ static int check_func_arg(struct bpf_verifier_env *env, 
u32 regno,
verbose(env, "invalid map_ptr to access map->value\n");
return -EACCES;
}
+   meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
err = check_helper_mem_access(env, regno,
  meta->map_ptr->value_size, false,
- NULL);
+ meta);
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
 



[PATCH bpf-next v3 0/7] Implement queue/stack maps

2018-10-18 Thread Mauricio Vasquez B
In some applications this is needed have a pool of free elements, for
example the list of free L4 ports in a SNAT.  None of the current maps allow
to do it as it is not possible to get any element without having they key
it is associated to, even if it were possible, the lack of locking mecanishms in
eBPF would do it almost impossible to be implemented without data races.

This patchset implements two new kind of eBPF maps: queue and stack.
Those maps provide to eBPF programs the peek, push and pop operations, and for
userspace applications a new bpf_map_lookup_and_delete_elem() is added.

Signed-off-by: Mauricio Vasquez B 

v2 -> v3:
 - Remove "almost dead code" in syscall.c
 - Remove unnecessary copy_from_user in bpf_map_lookup_and_delete_elem
 - Rebase

v1 -> v2:
 - Put ARG_PTR_TO_UNINIT_MAP_VALUE logic into a separated patch
 - Fix missing __this_cpu_dec & preempt_enable calls in kernel/bpf/syscall.c

RFC v4 -> v1:
 - Remove roundup to power of 2 in memory allocation
 - Remove count and use a free slot to check if queue/stack is empty
 - Use if + assigment for wrapping indexes
 - Fix some minor style issues
 - Squash two patches together

RFC v3 -> RFC v4:
 - Revert renaming of kernel/bpf/stackmap.c
 - Remove restriction on value size
 - Remove len arguments from peek/pop helpers
 - Add new ARG_PTR_TO_UNINIT_MAP_VALUE

RFC v2 -> RFC v3:
 - Return elements by value instead that by reference
 - Implement queue/stack base on array and head + tail indexes
 - Rename stack trace related files to avoid confusion and conflicts

RFC v1 -> RFC v2:
 - Create two separate maps instead of single one + flags
 - Implement bpf_map_lookup_and_delete syscall
 - Support peek operation
 - Define replacement policy through flags in the update() method
 - Add eBPF side tests

---

Mauricio Vasquez B (7):
  bpf: rename stack trace map operations
  bpf/syscall: allow key to be null in map functions
  bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE
  bpf: add queue and stack maps
  bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall
  Sync uapi/bpf.h to tools/include
  selftests/bpf: add test cases for queue and stack maps


 include/linux/bpf.h|7 
 include/linux/bpf_types.h  |4 
 include/uapi/linux/bpf.h   |   30 ++
 kernel/bpf/Makefile|2 
 kernel/bpf/core.c  |3 
 kernel/bpf/helpers.c   |   43 +++
 kernel/bpf/queue_stack_maps.c  |  288 
 kernel/bpf/stackmap.c  |2 
 kernel/bpf/syscall.c   |   91 ++
 kernel/bpf/verifier.c  |   28 ++
 net/core/filter.c  |6 
 tools/include/uapi/linux/bpf.h |   30 ++
 tools/lib/bpf/bpf.c|   12 +
 tools/lib/bpf/bpf.h|2 
 tools/testing/selftests/bpf/Makefile   |5 
 tools/testing/selftests/bpf/bpf_helpers.h  |7 
 tools/testing/selftests/bpf/test_maps.c|  122 
 tools/testing/selftests/bpf/test_progs.c   |   99 +++
 tools/testing/selftests/bpf/test_queue_map.c   |4 
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 
 tools/testing/selftests/bpf/test_stack_map.c   |4 
 21 files changed, 834 insertions(+), 14 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

--



[PATCH bpf-next v2 7/7] selftests/bpf: add test cases for queue and stack maps

2018-10-10 Thread Mauricio Vasquez B
test_maps:
Tests that queue/stack maps are behaving correctly even in corner cases

test_progs:
Tests new ebpf helpers

Signed-off-by: Mauricio Vasquez B 
---
 tools/lib/bpf/bpf.c|   12 ++
 tools/lib/bpf/bpf.h|1 
 tools/testing/selftests/bpf/Makefile   |5 +
 tools/testing/selftests/bpf/bpf_helpers.h  |7 +
 tools/testing/selftests/bpf/test_maps.c|  122 
 tools/testing/selftests/bpf/test_progs.c   |   99 
 tools/testing/selftests/bpf/test_queue_map.c   |4 +
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 ++
 tools/testing/selftests/bpf/test_stack_map.c   |4 +
 9 files changed, 312 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d70a255cb05e..ad2d41a6e3dd 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -278,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void 
*value)
return sys_bpf(BPF_MAP_LOOKUP_ELEM, , sizeof(attr));
 }
 
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value)
+{
+   union bpf_attr attr;
+
+   bzero(, sizeof(attr));
+   attr.map_fd = fd;
+   attr.key = ptr_to_u64(key);
+   attr.value = ptr_to_u64(value);
+
+   return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, , sizeof(attr));
+}
+
 int bpf_map_delete_elem(int fd, const void *key)
 {
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 87520a87a75f..57497185afaa 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -86,6 +86,7 @@ int bpf_map_update_elem(int fd, const void *key, const void 
*value,
__u64 flags);
 
 int bpf_map_lookup_elem(int fd, const void *key, void *value);
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value);
 int bpf_map_delete_elem(int fd, const void *key);
 int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 int bpf_obj_pin(int fd, const char *pathname);
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index d24afe8b821d..710fc1356c87 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -37,7 +37,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o 
test_tcp_estats.o test
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o 
test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
-   test_sk_lookup_kern.o test_xdp_vlan.o
+   test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -116,6 +116,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
+$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
+$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h 
b/tools/testing/selftests/bpf/bpf_helpers.h
index fda8c162d0df..6407a3df0f3b 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void 
*value,
(void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+   unsigned long long flags) =
+   (void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value) =
+   (void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value) =
+   (void *) BPF_FUNC_map_peek_elem;
 static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
(void *) BPF_FUNC_probe_read;
 static unsigned long long (*bpf_ktime_get_ns)(void) =
diff --git a/tools/testing/selftests/bpf/test_maps.c 
b/tools/testing/selftests/bpf/test_maps.c
index 9b552c0fc47d..4db2116e52be 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -471,6 +472,122 @@ static void test_devmap(int task, void *data)
close(f

[PATCH bpf-next v2 6/7] Sync uapi/bpf.h to tools/include

2018-10-10 Thread Mauricio Vasquez B
Sync both files.

Signed-off-by: Mauricio Vasquez B 
---
 tools/include/uapi/linux/bpf.h |   30 +-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f9187b41dff6..c8824d5364ff 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
@@ -128,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -462,6 +465,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safely attempt to read *size* bytes from
@@ -2303,7 +2328,10 @@ union bpf_attr {
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp),  \
FN(sk_lookup_udp),  \
-   FN(sk_release),
+   FN(sk_release), \
+   FN(map_push_elem),  \
+   FN(map_pop_elem),   \
+   FN(map_peek_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call



[PATCH bpf-next v2 4/7] bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE

2018-10-10 Thread Mauricio Vasquez B
ARG_PTR_TO_UNINIT_MAP_VALUE argument is a pointer to a memory zone
used to save the value of a map.  Basically the same as
ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra
argument.

This will be used in the following patch that implements some new
helpers that receive a pointer to be filled with a map value.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h   |1 +
 kernel/bpf/verifier.c |9 ++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5793f0c7fbb5..e37b4986bb45 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -139,6 +139,7 @@ enum bpf_arg_type {
ARG_CONST_MAP_PTR,  /* const argument used as pointer to bpf_map */
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE,   /* pointer to stack used as map value */
+   ARG_PTR_TO_UNINIT_MAP_VALUE,/* pointer to valid memory used to 
store a map value */
 
/* the following constraints used to prototype bpf_memcmp() and other
 * functions that access data on eBPF program stack
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3f93a548a642..d84c91ac3b70 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2117,7 +2117,8 @@ static int check_func_arg(struct bpf_verifier_env *env, 
u32 regno,
}
 
if (arg_type == ARG_PTR_TO_MAP_KEY ||
-   arg_type == ARG_PTR_TO_MAP_VALUE) {
+   arg_type == ARG_PTR_TO_MAP_VALUE ||
+   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
expected_type = PTR_TO_STACK;
if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
type != expected_type)
@@ -2187,7 +2188,8 @@ static int check_func_arg(struct bpf_verifier_env *env, 
u32 regno,
err = check_helper_mem_access(env, regno,
  meta->map_ptr->key_size, false,
  NULL);
-   } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
+   } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
+  arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
 * check [value, value + map->value_size) validity
 */
@@ -2196,9 +2198,10 @@ static int check_func_arg(struct bpf_verifier_env *env, 
u32 regno,
verbose(env, "invalid map_ptr to access map->value\n");
return -EACCES;
}
+   meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
err = check_helper_mem_access(env, regno,
  meta->map_ptr->value_size, false,
- NULL);
+ meta);
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
 



[PATCH bpf-next v2 5/7] bpf: add queue and stack maps

2018-10-10 Thread Mauricio Vasquez B
Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs.
These maps support peek, pop and push operations that are exposed to eBPF
programs through the new bpf_map[peek/pop/push] helpers.  Those operations
are exposed to userspace applications through the already existing
syscalls in the following way:

BPF_MAP_LOOKUP_ELEM-> peek
BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop
BPF_MAP_UPDATE_ELEM-> push

Queue/stack maps are implemented using a buffer, tail and head indexes,
hence BPF_F_NO_PREALLOC is not supported.

As opposite to other maps, queue and stack do not use RCU for protecting
maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE
argument that is a pointer to a memory zone where to save the value of a
map.  Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not
be passed as an extra argument.

Our main motivation for implementing queue/stack maps was to keep track
of a pool of elements, like network ports in a SNAT, however we forsee
other use cases, like for exampling saving last N kernel events in a map
and then analysing from userspace.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h   |6 +
 include/linux/bpf_types.h |2 
 include/uapi/linux/bpf.h  |   29 
 kernel/bpf/Makefile   |2 
 kernel/bpf/core.c |3 
 kernel/bpf/helpers.c  |   43 ++
 kernel/bpf/queue_stack_maps.c |  288 +
 kernel/bpf/syscall.c  |   11 +-
 kernel/bpf/verifier.c |   19 +++
 net/core/filter.c |6 +
 10 files changed, 405 insertions(+), 4 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e37b4986bb45..2c4854c2c2dc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,9 @@ struct bpf_map_ops {
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
+   int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+   int (*map_pop_elem)(struct bpf_map *map, void *value);
+   int (*map_peek_elem)(struct bpf_map *map, void *value);
 
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -827,6 +830,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct 
bpf_map *map,
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 658509daacd4..a2ec73aa1ec7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3bb94aa2d408..c8824d5364ff 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -129,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -463,6 +465,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing program

[PATCH bpf-next v2 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall

2018-10-10 Thread Mauricio Vasquez B
The following patch implements a bpf queue/stack maps that
provides the peek/pop/push functions.  There is not a direct
relationship between those functions and the current maps
syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added,
this is mapped to the pop operation in the queue/stack maps
and it is still to implement in other kind of maps.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h  |1 +
 include/uapi/linux/bpf.h |1 +
 kernel/bpf/syscall.c |   82 ++
 3 files changed, 84 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9b558713447f..5793f0c7fbb5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,7 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
+   void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
 
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..3bb94aa2d408 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f36c080ad356..6907d661dea5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -980,6 +980,85 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
 }
 
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+
+static int map_lookup_and_delete_elem(union bpf_attr *attr)
+{
+   void __user *ukey = u64_to_user_ptr(attr->key);
+   void __user *uvalue = u64_to_user_ptr(attr->value);
+   int ufd = attr->map_fd;
+   struct bpf_map *map;
+   void *key, *value, *ptr;
+   u32 value_size;
+   struct fd f;
+   int err;
+
+   if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
+   return -EINVAL;
+
+   f = fdget(ufd);
+   map = __bpf_map_get(f);
+   if (IS_ERR(map))
+   return PTR_ERR(map);
+
+   if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+   err = -EPERM;
+   goto err_put;
+   }
+
+   key = __bpf_copy_key(ukey, map->key_size);
+   if (IS_ERR(key)) {
+   err = PTR_ERR(key);
+   goto err_put;
+   }
+
+   value_size = map->value_size;
+
+   err = -ENOMEM;
+   value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+   if (!value)
+   goto free_key;
+
+   err = -EFAULT;
+   if (copy_from_user(value, uvalue, value_size) != 0)
+   goto free_value;
+
+   /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+* inside bpf map update or delete otherwise deadlocks are possible
+*/
+   preempt_disable();
+   __this_cpu_inc(bpf_prog_active);
+   if (map->ops->map_lookup_and_delete_elem) {
+   rcu_read_lock();
+   ptr = map->ops->map_lookup_and_delete_elem(map, key);
+   if (ptr)
+   memcpy(value, ptr, value_size);
+   rcu_read_unlock();
+   err = ptr ? 0 : -ENOENT;
+   } else {
+   err = -ENOTSUPP;
+   }
+
+   __this_cpu_dec(bpf_prog_active);
+   preempt_enable();
+
+   if (err)
+   goto free_value;
+
+   if (copy_to_user(uvalue, value, value_size) != 0)
+   goto free_value;
+
+   err = 0;
+
+free_value:
+   kfree(value);
+free_key:
+   kfree(key);
+err_put:
+   fdput(f);
+   return err;
+}
+
 static const struct bpf_prog_ops * const bpf_prog_types[] = {
 #define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
@@ -2453,6 +2532,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, 
uattr, unsigned int, siz
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(, uattr);
break;
+   case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
+   err = map_lookup_and_delete_elem();
+   break;
default:
err = -EINVAL;
break;



[PATCH bpf-next v2 2/7] bpf/syscall: allow key to be null in map functions

2018-10-10 Thread Mauricio Vasquez B
This commit adds the required logic to allow key being NULL
in case the key_size of the map is 0.

A new __bpf_copy_key function helper only copies the key from
userpsace when key_size != 0, otherwise it enforces that key must be
null.

Signed-off-by: Mauricio Vasquez B 
---
 kernel/bpf/syscall.c |   19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4f416234251f..f36c080ad356 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -651,6 +651,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void 
*key, void *value)
return -ENOTSUPP;
 }
 
+static void *__bpf_copy_key(void __user *ukey, u64 key_size)
+{
+   if (key_size)
+   return memdup_user(ukey, key_size);
+
+   if (ukey)
+   return ERR_PTR(-EINVAL);
+
+   return NULL;
+}
+
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 
@@ -678,7 +689,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -774,7 +785,7 @@ static int map_update_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -876,7 +887,7 @@ static int map_delete_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -928,7 +939,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
 
if (ukey) {
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;



[PATCH bpf-next v2 1/7] bpf: rename stack trace map operations

2018-10-10 Thread Mauricio Vasquez B
In the following patches queue and stack maps (FIFO and LIFO
datastructures) will be implemented.  In order to avoid confusion and
a possible name clash rename stack_map_ops to stack_trace_map_ops

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf_types.h |2 +-
 kernel/bpf/stackmap.c |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5432f4c9f50e..658509daacd4 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
 #ifdef CONFIG_PERF_EVENTS
-BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b2ade10f7ec3..90daf285de03 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -600,7 +600,7 @@ static void stack_map_free(struct bpf_map *map)
put_callchain_buffers();
 }
 
-const struct bpf_map_ops stack_map_ops = {
+const struct bpf_map_ops stack_trace_map_ops = {
.map_alloc = stack_map_alloc,
.map_free = stack_map_free,
.map_get_next_key = stack_map_get_next_key,



[PATCH bpf-next v2 0/7] Implement queue/stack maps

2018-10-10 Thread Mauricio Vasquez B
In some applications this is needed have a pool of free elements, for
example the list of free L4 ports in a SNAT.  None of the current maps allow
to do it as it is not possible to get any element without having they key
it is associated to, even if it were possible, the lack of locking mecanishms in
eBPF would do it almost impossible to be implemented without data races.

This patchset implements two new kind of eBPF maps: queue and stack.
Those maps provide to eBPF programs the peek, push and pop operations, and for
userspace applications a new bpf_map_lookup_and_delete_elem() is added.

Signed-off-by: Mauricio Vasquez B 

v1 -> v2:
 - Put ARG_PTR_TO_UNINIT_MAP_VALUE logic into a separated patch
 - Fix missing __this_cpu_dec & preempt_enable calls in kernel/bpf/syscall.c

RFC v4 -> v1:
 - Remove roundup to power of 2 in memory allocation
 - Remove count and use a free slot to check if queue/stack is empty
 - Use if + assigment for wrapping indexes
 - Fix some minor style issues
 - Squash two patches together

RFC v3 -> RFC v4:
 - Revert renaming of kernel/bpf/stackmap.c
 - Remove restriction on value size
 - Remove len arguments from peek/pop helpers
 - Add new ARG_PTR_TO_UNINIT_MAP_VALUE

RFC v2 -> RFC v3:
 - Return elements by value instead that by reference
 - Implement queue/stack base on array and head + tail indexes
 - Rename stack trace related files to avoid confusion and conflicts

RFC v1 -> RFC v2:
 - Create two separate maps instead of single one + flags
 - Implement bpf_map_lookup_and_delete syscall
 - Support peek operation
 - Define replacement policy through flags in the update() method
 - Add eBPF side tests

---

Mauricio Vasquez B (7):
  bpf: rename stack trace map operations
  bpf/syscall: allow key to be null in map functions
  bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall
  bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE
  bpf: add queue and stack maps
  Sync uapi/bpf.h to tools/include
  selftests/bpf: add test cases for queue and stack maps


 include/linux/bpf.h|8 +
 include/linux/bpf_types.h  |4 
 include/uapi/linux/bpf.h   |   30 ++
 kernel/bpf/Makefile|2 
 kernel/bpf/core.c  |3 
 kernel/bpf/helpers.c   |   43 +++
 kernel/bpf/queue_stack_maps.c  |  288 
 kernel/bpf/stackmap.c  |2 
 kernel/bpf/syscall.c   |  110 +++-
 kernel/bpf/verifier.c  |   28 ++
 net/core/filter.c  |6 
 tools/include/uapi/linux/bpf.h |   30 ++
 tools/lib/bpf/bpf.c|   12 +
 tools/lib/bpf/bpf.h|1 
 tools/testing/selftests/bpf/Makefile   |5 
 tools/testing/selftests/bpf/bpf_helpers.h  |7 
 tools/testing/selftests/bpf/test_maps.c|  122 
 tools/testing/selftests/bpf/test_progs.c   |   99 +++
 tools/testing/selftests/bpf/test_queue_map.c   |4 
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 
 tools/testing/selftests/bpf/test_stack_map.c   |4 
 21 files changed, 853 insertions(+), 14 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

--



[PATCH bpf-next 6/6] selftests/bpf: add test cases for queue and stack maps

2018-10-08 Thread Mauricio Vasquez B
test_maps:
Tests that queue/stack maps are behaving correctly even in corner cases

test_progs:
Tests new ebpf helpers

Signed-off-by: Mauricio Vasquez B 
---
 tools/lib/bpf/bpf.c|   12 ++
 tools/lib/bpf/bpf.h|1 
 tools/testing/selftests/bpf/Makefile   |6 +
 tools/testing/selftests/bpf/bpf_helpers.h  |7 +
 tools/testing/selftests/bpf/test_maps.c|  122 
 tools/testing/selftests/bpf/test_progs.c   |   99 
 tools/testing/selftests/bpf/test_queue_map.c   |4 +
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 ++
 tools/testing/selftests/bpf/test_stack_map.c   |4 +
 9 files changed, 313 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 3878a26a2071..13810c88e1b6 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -278,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void 
*value)
return sys_bpf(BPF_MAP_LOOKUP_ELEM, , sizeof(attr));
 }
 
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value)
+{
+   union bpf_attr attr;
+
+   bzero(, sizeof(attr));
+   attr.map_fd = fd;
+   attr.key = ptr_to_u64(key);
+   attr.value = ptr_to_u64(value);
+
+   return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, , sizeof(attr));
+}
+
 int bpf_map_delete_elem(int fd, const void *key)
 {
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6f38164b2618..6134ed9517d3 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -86,6 +86,7 @@ int bpf_map_update_elem(int fd, const void *key, const void 
*value,
__u64 flags);
 
 int bpf_map_lookup_elem(int fd, const void *key, void *value);
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value);
 int bpf_map_delete_elem(int fd, const void *key);
 int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 int bpf_obj_pin(int fd, const char *pathname);
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index 1381ab81099c..f78cf72832aa 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -36,7 +36,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o 
test_tcp_estats.o test
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o 
test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-   test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_sk_lookup_kern.o
+   test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o 
test_sk_lookup_kern.o \
+   test_queue_map.o test_stack_map.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -114,6 +115,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
+$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
+$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h 
b/tools/testing/selftests/bpf/bpf_helpers.h
index 1d407b3494f9..58dfcb88f9b4 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void 
*value,
(void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+   unsigned long long flags) =
+   (void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value) =
+   (void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value) =
+   (void *) BPF_FUNC_map_peek_elem;
 static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
(void *) BPF_FUNC_probe_read;
 static unsigned long long (*bpf_ktime_get_ns)(void) =
diff --git a/tools/testing/selftests/bpf/test_maps.c 
b/tools/testing/selftests/bpf/test_maps.c
index 9b552c0fc47d..4db2116e52be 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #includ

[PATCH bpf-next 5/6] Sync uapi/bpf.h to tools/include

2018-10-08 Thread Mauricio Vasquez B
Sync both files.

Signed-off-by: Mauricio Vasquez B 
---
 tools/include/uapi/linux/bpf.h |   36 
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f9187b41dff6..bfa042273fad 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
@@ -128,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -462,6 +465,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safely attempt to read *size* bytes from
@@ -789,14 +814,14 @@ union bpf_attr {
  *
  * int ret;
  * struct bpf_tunnel_key key = {};
- * 
+ *
  * ret = bpf_skb_get_tunnel_key(skb, , sizeof(key), 0);
  * if (ret < 0)
  * return TC_ACT_SHOT; // drop packet
- * 
+ *
  * if (key.remote_ipv4 != 0x0a01)
  * return TC_ACT_SHOT; // drop packet
- * 
+ *
  * return TC_ACT_OK;   // accept packet
  *
  * This interface can also be used with all encapsulation devices
@@ -2303,7 +2328,10 @@ union bpf_attr {
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp),  \
FN(sk_lookup_udp),  \
-   FN(sk_release),
+   FN(sk_release), \
+   FN(map_push_elem),  \
+   FN(map_pop_elem),   \
+   FN(map_peek_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call



[PATCH bpf-next 2/6] bpf/syscall: allow key to be null in map functions

2018-10-08 Thread Mauricio Vasquez B
This commit adds the required logic to allow key being NULL
in case the key_size of the map is 0.

A new __bpf_copy_key function helper only copies the key from
userpsace when key_size != 0, otherwise it enforces that key must be
null.

Signed-off-by: Mauricio Vasquez B 
---
 kernel/bpf/syscall.c |   19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5742df21598c..eb75e8af73ff 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -651,6 +651,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void 
*key, void *value)
return -ENOTSUPP;
 }
 
+static void *__bpf_copy_key(void __user *ukey, u64 key_size)
+{
+   if (key_size)
+   return memdup_user(ukey, key_size);
+
+   if (ukey)
+   return ERR_PTR(-EINVAL);
+
+   return NULL;
+}
+
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 
@@ -678,7 +689,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -769,7 +780,7 @@ static int map_update_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -871,7 +882,7 @@ static int map_delete_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -923,7 +934,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
 
if (ukey) {
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;



[PATCH bpf-next 3/6] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall

2018-10-08 Thread Mauricio Vasquez B
The following patch implements a bpf queue/stack maps that
provides the peek/pop/push functions.  There is not a direct
relationship between those functions and the current maps
syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added,
this is mapped to the pop operation in the queue/stack maps
and it is still to implement in other kind of maps.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h  |1 +
 include/uapi/linux/bpf.h |1 +
 kernel/bpf/syscall.c |   81 ++
 3 files changed, 83 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 027697b6a22f..98c7eeb6d138 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,7 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
+   void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
 
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..3bb94aa2d408 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index eb75e8af73ff..c33d9303f72f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -975,6 +975,84 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
 }
 
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+
+static int map_lookup_and_delete_elem(union bpf_attr *attr)
+{
+   void __user *ukey = u64_to_user_ptr(attr->key);
+   void __user *uvalue = u64_to_user_ptr(attr->value);
+   int ufd = attr->map_fd;
+   struct bpf_map *map;
+   void *key, *value, *ptr;
+   u32 value_size;
+   struct fd f;
+   int err;
+
+   if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
+   return -EINVAL;
+
+   f = fdget(ufd);
+   map = __bpf_map_get(f);
+   if (IS_ERR(map))
+   return PTR_ERR(map);
+
+   if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+   err = -EPERM;
+   goto err_put;
+   }
+
+   key = __bpf_copy_key(ukey, map->key_size);
+   if (IS_ERR(key)) {
+   err = PTR_ERR(key);
+   goto err_put;
+   }
+
+   value_size = map->value_size;
+
+   err = -ENOMEM;
+   value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+   if (!value)
+   goto free_key;
+
+   err = -EFAULT;
+   if (copy_from_user(value, uvalue, value_size) != 0)
+   goto free_value;
+
+   /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+* inside bpf map update or delete otherwise deadlocks are possible
+*/
+   preempt_disable();
+   __this_cpu_inc(bpf_prog_active);
+   if (!map->ops->map_lookup_and_delete_elem) {
+   err = -ENOTSUPP;
+   goto free_value;
+   }
+   rcu_read_lock();
+   ptr = map->ops->map_lookup_and_delete_elem(map, key);
+   if (ptr)
+   memcpy(value, ptr, value_size);
+   rcu_read_unlock();
+   err = ptr ? 0 : -ENOENT;
+   __this_cpu_dec(bpf_prog_active);
+   preempt_enable();
+
+   if (err)
+   goto free_value;
+
+   if (copy_to_user(uvalue, value, value_size) != 0)
+   goto free_value;
+
+   err = 0;
+
+free_value:
+   kfree(value);
+free_key:
+   kfree(key);
+err_put:
+   fdput(f);
+   return err;
+}
+
 static const struct bpf_prog_ops * const bpf_prog_types[] = {
 #define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
@@ -2448,6 +2526,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, 
uattr, unsigned int, siz
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(, uattr);
break;
+   case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
+   err = map_lookup_and_delete_elem();
+   break;
default:
err = -EINVAL;
break;



[PATCH bpf-next 4/6] bpf: add queue and stack maps

2018-10-08 Thread Mauricio Vasquez B
Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs.
These maps support peek, pop and push operations that are exposed to eBPF
programs through the new bpf_map[peek/pop/push] helpers.  Those operations
are exposed to userspace applications through the already existing
syscalls in the following way:

BPF_MAP_LOOKUP_ELEM-> peek
BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop
BPF_MAP_UPDATE_ELEM-> push

Queue/stack maps are implemented using a buffer, tail and head indexes,
hence BPF_F_NO_PREALLOC is not supported.

As opposite to other maps, queue and stack do not use RCU for protecting
maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE
argument that is a pointer to a memory zone where to save the value of a
map.  Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not
be passed as an extra argument.

Our main motivation for implementing queue/stack maps was to keep track
of a pool of elements, like network ports in a SNAT, however we forsee
other use cases, like for exampling saving last N kernel events in a map
and then analysing from userspace.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h   |7 +
 include/linux/bpf_types.h |2 
 include/uapi/linux/bpf.h  |   35 -
 kernel/bpf/Makefile   |2 
 kernel/bpf/core.c |3 
 kernel/bpf/helpers.c  |   43 ++
 kernel/bpf/queue_stack_maps.c |  288 +
 kernel/bpf/syscall.c  |   30 +++-
 kernel/bpf/verifier.c |   28 +++-
 net/core/filter.c |6 +
 10 files changed, 426 insertions(+), 18 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 98c7eeb6d138..cad3bc5cffd1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,9 @@ struct bpf_map_ops {
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
+   int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+   int (*map_pop_elem)(struct bpf_map *map, void *value);
+   int (*map_peek_elem)(struct bpf_map *map, void *value);
 
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -139,6 +142,7 @@ enum bpf_arg_type {
ARG_CONST_MAP_PTR,  /* const argument used as pointer to bpf_map */
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE,   /* pointer to stack used as map value */
+   ARG_PTR_TO_UNINIT_MAP_VALUE,/* pointer to valid memory used to 
store a map value */
 
/* the following constraints used to prototype bpf_memcmp() and other
 * functions that access data on eBPF program stack
@@ -825,6 +829,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct 
bpf_map *map,
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 658509daacd4..a2ec73aa1ec7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3bb94aa2d408..bfa042273fad 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -129,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -463,6 +465,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *

[PATCH bpf-next 0/6] Implement queue/stack maps

2018-10-08 Thread Mauricio Vasquez B
In some applications this is needed have a pool of free elements, for
example the list of free L4 ports in a SNAT.  None of the current maps allow
to do it as it is not possible to get any element without having they key
it is associated to, even if it were possible, the lack of locking mecanishms in
eBPF would do it almost impossible to be implemented without data races.

This patchset implements two new kind of eBPF maps: queue and stack.
Those maps provide to eBPF programs the peek, push and pop operations, and for
userspace applications a new bpf_map_lookup_and_delete_elem() is added.

Signed-off-by: Mauricio Vasquez B 

RFC v4 -> v1:
 - Remove roundup to power of 2 in memory allocation
 - Remove count and use a free slot to check if queue/stack is empty
 - Use if + assigment for wrapping indexes
 - Fix some minor style issues
 - Squash two patches together

RFC v3 -> RFC v4:
 - Revert renaming of kernel/bpf/stackmap.c
 - Remove restriction on value size
 - Remove len arguments from peek/pop helpers
 - Add new ARG_PTR_TO_UNINIT_MAP_VALUE

RFC v2 -> RFC v3:
 - Return elements by value instead that by reference
 - Implement queue/stack base on array and head + tail indexes
 - Rename stack trace related files to avoid confusion and conflicts

RFC v1 -> RFC v2:
 - Create two separate maps instead of single one + flags
 - Implement bpf_map_lookup_and_delete syscall
 - Support peek operation
 - Define replacement policy through flags in the update() method
 - Add eBPF side tests

---

Mauricio Vasquez B (6):
  bpf: rename stack trace map operations
  bpf/syscall: allow key to be null in map functions
  bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall
  bpf: add queue and stack maps
  Sync uapi/bpf.h to tools/include
  selftests/bpf: add test cases for queue and stack maps


 include/linux/bpf.h|8 +
 include/linux/bpf_types.h  |4 
 include/uapi/linux/bpf.h   |   36 ++-
 kernel/bpf/Makefile|2 
 kernel/bpf/core.c  |3 
 kernel/bpf/helpers.c   |   43 +++
 kernel/bpf/queue_stack_maps.c  |  288 
 kernel/bpf/stackmap.c  |2 
 kernel/bpf/syscall.c   |  112 
 kernel/bpf/verifier.c  |   28 ++
 net/core/filter.c  |6 
 tools/include/uapi/linux/bpf.h |   36 ++-
 tools/lib/bpf/bpf.c|   12 +
 tools/lib/bpf/bpf.h|1 
 tools/testing/selftests/bpf/Makefile   |6 
 tools/testing/selftests/bpf/bpf_helpers.h  |7 
 tools/testing/selftests/bpf/test_maps.c|  122 
 tools/testing/selftests/bpf/test_progs.c   |   99 +++
 tools/testing/selftests/bpf/test_queue_map.c   |4 
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 
 tools/testing/selftests/bpf/test_stack_map.c   |4 
 21 files changed, 862 insertions(+), 20 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

--



[PATCH bpf-next 1/6] bpf: rename stack trace map operations

2018-10-08 Thread Mauricio Vasquez B
In the following patches queue and stack maps (FIFO and LIFO
datastructures) will be implemented.  In order to avoid confusion and
a possible name clash rename stack_map_ops to stack_trace_map_ops

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf_types.h |2 +-
 kernel/bpf/stackmap.c |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5432f4c9f50e..658509daacd4 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
 #ifdef CONFIG_PERF_EVENTS
-BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 8061a439ef18..bb41e293418d 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -600,7 +600,7 @@ static void stack_map_free(struct bpf_map *map)
put_callchain_buffers();
 }
 
-const struct bpf_map_ops stack_map_ops = {
+const struct bpf_map_ops stack_trace_map_ops = {
.map_alloc = stack_map_alloc,
.map_free = stack_map_free,
.map_get_next_key = stack_map_get_next_key,



[RFC PATCH bpf-next v4 6/7] Sync uapi/bpf.h to tools/include

2018-10-04 Thread Mauricio Vasquez B
Sync both files.

Signed-off-by: Mauricio Vasquez B 
---
 tools/include/uapi/linux/bpf.h |   36 
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f9187b41dff6..bfa042273fad 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
@@ -128,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -462,6 +465,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safely attempt to read *size* bytes from
@@ -789,14 +814,14 @@ union bpf_attr {
  *
  * int ret;
  * struct bpf_tunnel_key key = {};
- * 
+ *
  * ret = bpf_skb_get_tunnel_key(skb, , sizeof(key), 0);
  * if (ret < 0)
  * return TC_ACT_SHOT; // drop packet
- * 
+ *
  * if (key.remote_ipv4 != 0x0a01)
  * return TC_ACT_SHOT; // drop packet
- * 
+ *
  * return TC_ACT_OK;   // accept packet
  *
  * This interface can also be used with all encapsulation devices
@@ -2303,7 +2328,10 @@ union bpf_attr {
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp),  \
FN(sk_lookup_udp),  \
-   FN(sk_release),
+   FN(sk_release), \
+   FN(map_push_elem),  \
+   FN(map_pop_elem),   \
+   FN(map_peek_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call



[RFC PATCH bpf-next v4 5/7] bpf: restrict use of peek/push/pop

2018-10-04 Thread Mauricio Vasquez B
Restrict the use of peek, push and pop helpers only to queue and stack
maps.

Signed-off-by: Mauricio Vasquez B 
---
 kernel/bpf/verifier.c |   14 ++
 1 file changed, 14 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 489667f93061..8b1f1b348782 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2328,6 +2328,13 @@ static int check_map_func_compatibility(struct 
bpf_verifier_env *env,
if (func_id != BPF_FUNC_sk_select_reuseport)
goto error;
break;
+   case BPF_MAP_TYPE_QUEUE:
+   case BPF_MAP_TYPE_STACK:
+   if (func_id != BPF_FUNC_map_peek_elem &&
+   func_id != BPF_FUNC_map_pop_elem &&
+   func_id != BPF_FUNC_map_push_elem)
+   goto error;
+   break;
default:
break;
}
@@ -2384,6 +2391,13 @@ static int check_map_func_compatibility(struct 
bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
goto error;
break;
+   case BPF_FUNC_map_peek_elem:
+   case BPF_FUNC_map_pop_elem:
+   case BPF_FUNC_map_push_elem:
+   if (map->map_type != BPF_MAP_TYPE_QUEUE &&
+   map->map_type != BPF_MAP_TYPE_STACK)
+   goto error;
+   break;
default:
break;
}



[RFC PATCH bpf-next v4 2/7] bpf/syscall: allow key to be null in map functions

2018-10-04 Thread Mauricio Vasquez B
This commit adds the required logic to allow key being NULL
in case the key_size of the map is 0.

A new __bpf_copy_key function helper only copies the key from
userpsace when key_size != 0, otherwise it enforces that key must be
null.

Signed-off-by: Mauricio Vasquez B 
---
 kernel/bpf/syscall.c |   19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5742df21598c..eb75e8af73ff 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -651,6 +651,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void 
*key, void *value)
return -ENOTSUPP;
 }
 
+static void *__bpf_copy_key(void __user *ukey, u64 key_size)
+{
+   if (key_size)
+   return memdup_user(ukey, key_size);
+
+   if (ukey)
+   return ERR_PTR(-EINVAL);
+
+   return NULL;
+}
+
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 
@@ -678,7 +689,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -769,7 +780,7 @@ static int map_update_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -871,7 +882,7 @@ static int map_delete_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -923,7 +934,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
 
if (ukey) {
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;



[RFC PATCH bpf-next v4 7/7] selftests/bpf: add test cases for queue and stack maps

2018-10-04 Thread Mauricio Vasquez B
Two types of tests are done:
- test_maps: only userspace api.
- test_progs: userspace api and ebpf helpers.

Signed-off-by: Mauricio Vasquez B 
---
 tools/lib/bpf/bpf.c|   12 ++
 tools/lib/bpf/bpf.h|1 
 tools/testing/selftests/bpf/Makefile   |6 +
 tools/testing/selftests/bpf/bpf_helpers.h  |7 +
 tools/testing/selftests/bpf/test_maps.c|  122 
 tools/testing/selftests/bpf/test_progs.c   |   99 
 tools/testing/selftests/bpf/test_queue_map.c   |4 +
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 ++
 tools/testing/selftests/bpf/test_stack_map.c   |4 +
 9 files changed, 313 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 3878a26a2071..13810c88e1b6 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -278,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void 
*value)
return sys_bpf(BPF_MAP_LOOKUP_ELEM, , sizeof(attr));
 }
 
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value)
+{
+   union bpf_attr attr;
+
+   bzero(, sizeof(attr));
+   attr.map_fd = fd;
+   attr.key = ptr_to_u64(key);
+   attr.value = ptr_to_u64(value);
+
+   return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, , sizeof(attr));
+}
+
 int bpf_map_delete_elem(int fd, const void *key)
 {
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6f38164b2618..6134ed9517d3 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -86,6 +86,7 @@ int bpf_map_update_elem(int fd, const void *key, const void 
*value,
__u64 flags);
 
 int bpf_map_lookup_elem(int fd, const void *key, void *value);
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value);
 int bpf_map_delete_elem(int fd, const void *key);
 int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 int bpf_obj_pin(int fd, const char *pathname);
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index 1381ab81099c..f78cf72832aa 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -36,7 +36,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o 
test_tcp_estats.o test
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o 
test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-   test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_sk_lookup_kern.o
+   test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o 
test_sk_lookup_kern.o \
+   test_queue_map.o test_stack_map.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -114,6 +115,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
+$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
+$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h 
b/tools/testing/selftests/bpf/bpf_helpers.h
index 1d407b3494f9..58dfcb88f9b4 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void 
*value,
(void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+   unsigned long long flags) =
+   (void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value) =
+   (void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value) =
+   (void *) BPF_FUNC_map_peek_elem;
 static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
(void *) BPF_FUNC_probe_read;
 static unsigned long long (*bpf_ktime_get_ns)(void) =
diff --git a/tools/testing/selftests/bpf/test_maps.c 
b/tools/testing/selftests/bpf/test_maps.c
index 9b552c0fc47d..4db2116e52be 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -471

[RFC PATCH bpf-next v4 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall

2018-10-04 Thread Mauricio Vasquez B
The following patch implements a bpf queue/stack maps that
provides the peek/pop/push functions.  There is not a direct
relationship between those functions and the current maps
syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added,
this is mapped to the pop operation in the queue/stack maps
and it is still to implement in other kind of maps.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h  |1 +
 include/uapi/linux/bpf.h |1 +
 kernel/bpf/syscall.c |   82 ++
 3 files changed, 84 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 027697b6a22f..98c7eeb6d138 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,7 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
+   void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
 
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..3bb94aa2d408 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index eb75e8af73ff..50957e243bfb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -975,6 +975,85 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
 }
 
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+
+static int map_lookup_and_delete_elem(union bpf_attr *attr)
+{
+   void __user *ukey = u64_to_user_ptr(attr->key);
+   void __user *uvalue = u64_to_user_ptr(attr->value);
+   int ufd = attr->map_fd;
+   struct bpf_map *map;
+   void *key, *value, *ptr;
+   u32 value_size;
+   struct fd f;
+   int err;
+
+   if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
+   return -EINVAL;
+
+   f = fdget(ufd);
+   map = __bpf_map_get(f);
+   if (IS_ERR(map))
+   return PTR_ERR(map);
+
+   if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+   err = -EPERM;
+   goto err_put;
+   }
+
+   if (!map->ops->map_lookup_and_delete_elem) {
+   err = -ENOTSUPP;
+   goto err_put;
+   }
+
+   key = __bpf_copy_key(ukey, map->key_size);
+   if (IS_ERR(key)) {
+   err = PTR_ERR(key);
+   goto err_put;
+   }
+
+   value_size = map->value_size;
+
+   err = -ENOMEM;
+   value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+   if (!value)
+   goto free_key;
+
+   err = -EFAULT;
+   if (copy_from_user(value, uvalue, value_size) != 0)
+   goto free_value;
+
+   /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+* inside bpf map update or delete otherwise deadlocks are possible
+*/
+   preempt_disable();
+   __this_cpu_inc(bpf_prog_active);
+   rcu_read_lock();
+   ptr = map->ops->map_lookup_and_delete_elem(map, key);
+   if (ptr)
+   memcpy(value, ptr, value_size);
+   rcu_read_unlock();
+   err = ptr ? 0 : -ENOENT;
+   __this_cpu_dec(bpf_prog_active);
+   preempt_enable();
+
+   if (err)
+   goto free_value;
+
+   if (copy_to_user(uvalue, value, value_size) != 0)
+   goto free_value;
+
+   err = 0;
+
+free_value:
+   kfree(value);
+free_key:
+   kfree(key);
+err_put:
+   fdput(f);
+   return err;
+}
+
 static const struct bpf_prog_ops * const bpf_prog_types[] = {
 #define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
@@ -2448,6 +2527,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, 
uattr, unsigned int, siz
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(, uattr);
break;
+   case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
+   err = map_lookup_and_delete_elem();
+   break;
default:
err = -EINVAL;
break;



[RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps

2018-10-04 Thread Mauricio Vasquez B
Implement two new kind of maps that support the peek, push and pop
operations.

A use case for this is to keep track of a pool of elements, like
network ports in a SNAT.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h   |7 +
 include/linux/bpf_types.h |2 
 include/uapi/linux/bpf.h  |   35 -
 kernel/bpf/Makefile   |2 
 kernel/bpf/core.c |3 
 kernel/bpf/helpers.c  |   43 ++
 kernel/bpf/queue_stack_maps.c |  300 +
 kernel/bpf/syscall.c  |   31 +++-
 kernel/bpf/verifier.c |   14 +-
 net/core/filter.c |6 +
 10 files changed, 424 insertions(+), 19 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 98c7eeb6d138..cad3bc5cffd1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,9 @@ struct bpf_map_ops {
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
+   int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+   int (*map_pop_elem)(struct bpf_map *map, void *value);
+   int (*map_peek_elem)(struct bpf_map *map, void *value);
 
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -139,6 +142,7 @@ enum bpf_arg_type {
ARG_CONST_MAP_PTR,  /* const argument used as pointer to bpf_map */
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE,   /* pointer to stack used as map value */
+   ARG_PTR_TO_UNINIT_MAP_VALUE,/* pointer to valid memory used to 
store a map value */
 
/* the following constraints used to prototype bpf_memcmp() and other
 * functions that access data on eBPF program stack
@@ -825,6 +829,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct 
bpf_map *map,
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 658509daacd4..a2ec73aa1ec7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3bb94aa2d408..bfa042273fad 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -129,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -463,6 +465,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safely attempt to read *size* bytes from
@@ -790,14 +814,14 @@ union bpf_attr {
  *
  * int ret;
  * struct bpf_tunnel_key key = {};
- * 
+ *
  * ret = bpf_skb_get_tunnel_key(skb, , sizeof(key), 0);
  * if (ret < 0)
  * return TC_ACT_SHOT; // drop pac

[RFC PATCH bpf-next v4 1/7] bpf: rename stack trace map operations

2018-10-04 Thread Mauricio Vasquez B
In the following patches queue and stack maps (FIFO and LIFO
datastructures) will be implemented.  In order to avoid confusion and
a possible name clash rename stack_map_ops to stack_trace_map_ops

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf_types.h |2 +-
 kernel/bpf/stackmap.c |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5432f4c9f50e..658509daacd4 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
 #ifdef CONFIG_PERF_EVENTS
-BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 8061a439ef18..bb41e293418d 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -600,7 +600,7 @@ static void stack_map_free(struct bpf_map *map)
put_callchain_buffers();
 }
 
-const struct bpf_map_ops stack_map_ops = {
+const struct bpf_map_ops stack_trace_map_ops = {
.map_alloc = stack_map_alloc,
.map_free = stack_map_free,
.map_get_next_key = stack_map_get_next_key,



[RFC PATCH bpf-next v4 0/7] Implement bpf queue/stack maps

2018-10-04 Thread Mauricio Vasquez B
In some applications this is needed have a pool of free elements, for
example the list of free L4 ports in a SNAT.  None of the current maps allow
to do it as it is not possible to get any element without having they key
it is associated to.

This patchset implements two new kind of eBPF maps: queue and stack.
Those maps provide to eBPF programs the peek, push and pop operations, and for
userspace applications a new bpf_map_lookup_and_delete_elem() is added.

Signed-off-by: Mauricio Vasquez B 

v3 -> v4:
 - Revert renaming of kernel/bpf/stackmap.c
 - Remove restriction on value size
 - Remove len arguments from peek/pop helpers
 - Add new ARG_PTR_TO_UNINIT_MAP_VALUE

v2 -> v3:
 - Return elements by value instead that by reference
 - Implement queue/stack base on array and head + tail indexes
 - Rename stack trace related files to avoid confusion and conflicts

v1 -> v2:
 - Create two separate maps instead of single one + flags
 - Implement bpf_map_lookup_and_delete syscall
 - Support peek operation
 - Define replacement policy through flags in the update() method
 - Add eBPF side tests

---

Mauricio Vasquez B (7):
  bpf: rename stack trace map operations
  bpf/syscall: allow key to be null in map functions
  bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall
  bpf: add bpf queue and stack maps
  bpf: restrict use of peek/push/pop
  Sync uapi/bpf.h to tools/include
  selftests/bpf: add test cases for queue and stack maps


 include/linux/bpf.h|8 +
 include/linux/bpf_types.h  |4 
 include/uapi/linux/bpf.h   |   36 ++
 kernel/bpf/Makefile|2 
 kernel/bpf/core.c  |3 
 kernel/bpf/helpers.c   |   43 +++
 kernel/bpf/queue_stack_maps.c  |  300 
 kernel/bpf/stackmap.c  |2 
 kernel/bpf/syscall.c   |  112 +++
 kernel/bpf/verifier.c  |   28 ++
 net/core/filter.c  |6 
 tools/include/uapi/linux/bpf.h |   36 ++
 tools/lib/bpf/bpf.c|   12 +
 tools/lib/bpf/bpf.h|1 
 tools/testing/selftests/bpf/Makefile   |6 
 tools/testing/selftests/bpf/bpf_helpers.h  |7 
 tools/testing/selftests/bpf/test_maps.c|  122 
 tools/testing/selftests/bpf/test_progs.c   |   99 +++
 tools/testing/selftests/bpf/test_queue_map.c   |4 
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 
 tools/testing/selftests/bpf/test_stack_map.c   |4 
 21 files changed, 874 insertions(+), 20 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

--



[RFC PATCH bpf-next v3 7/7] selftests/bpf: add test cases for queue and stack maps

2018-09-17 Thread Mauricio Vasquez B
Two types of tests are done:
- test_maps: only userspace api.
- test_progs: userspace api and ebpf helpers.

Signed-off-by: Mauricio Vasquez B 
---
 kernel/bpf/helpers.c   |2 
 tools/lib/bpf/bpf.c|   12 ++
 tools/lib/bpf/bpf.h|1 
 tools/testing/selftests/bpf/Makefile   |5 +
 tools/testing/selftests/bpf/bpf_helpers.h  |7 +
 tools/testing/selftests/bpf/test_maps.c|  130 
 tools/testing/selftests/bpf/test_progs.c   |   99 +++
 tools/testing/selftests/bpf/test_queue_map.c   |4 +
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 +
 tools/testing/selftests/bpf/test_stack_map.c   |4 +
 10 files changed, 321 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5f364e6acaf1..1293cd5240e3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -76,7 +76,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
.arg2_type  = ARG_PTR_TO_MAP_KEY,
 };
 
-BPF_CALL_4(bpf_map_push_elem, struct bpf_map *, map, void *, value, u32 size,
+BPF_CALL_4(bpf_map_push_elem, struct bpf_map *, map, void *, value, u32, size,
   u64, flags)
 {
if (map->value_size != size)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 60aa4ca8b2c5..7056b2eb554d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -286,6 +286,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void 
*value)
return sys_bpf(BPF_MAP_LOOKUP_ELEM, , sizeof(attr));
 }
 
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value)
+{
+   union bpf_attr attr;
+
+   bzero(, sizeof(attr));
+   attr.map_fd = fd;
+   attr.key = ptr_to_u64(key);
+   attr.value = ptr_to_u64(value);
+
+   return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, , sizeof(attr));
+}
+
 int bpf_map_delete_elem(int fd, const void *key)
 {
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6f38164b2618..6134ed9517d3 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -86,6 +86,7 @@ int bpf_map_update_elem(int fd, const void *key, const void 
*value,
__u64 flags);
 
 int bpf_map_lookup_elem(int fd, const void *key, void *value);
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value);
 int bpf_map_delete_elem(int fd, const void *key);
 int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 int bpf_obj_pin(int fd, const char *pathname);
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index fff7fb1285fc..ad8a2b8fb738 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o 
test_tcp_estats.o test
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o 
test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-   test_skb_cgroup_id_kern.o
+   test_skb_cgroup_id_kern.o test_queue_map.o test_stack_map.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -110,6 +110,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 
+$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
+$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h 
b/tools/testing/selftests/bpf/bpf_helpers.h
index e4be7730222d..bdbe8f84023e 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void 
*value,
(void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, const void *value, int len,
+   unsigned long long flags) =
+   (void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value, int len) =
+   (void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value, int len) =
+   (void *) BPF_FUNC_ma

[RFC PATCH bpf-next v3 5/7] bpf: restrict use of peek/push/pop

2018-09-17 Thread Mauricio Vasquez B
Restrict the use of peek, push and pop helpers only to queue and stack
maps.

Signed-off-by: Mauricio Vasquez B 
---
 kernel/bpf/verifier.c |   14 ++
 1 file changed, 14 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b9e005188f0e..1628ffe48e32 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2084,6 +2084,13 @@ static int check_map_func_compatibility(struct 
bpf_verifier_env *env,
if (func_id != BPF_FUNC_sk_select_reuseport)
goto error;
break;
+   case BPF_MAP_TYPE_QUEUE:
+   case BPF_MAP_TYPE_STACK:
+   if (func_id != BPF_FUNC_map_peek_elem &&
+   func_id != BPF_FUNC_map_pop_elem &&
+   func_id != BPF_FUNC_map_push_elem)
+   goto error;
+   break;
default:
break;
}
@@ -2139,6 +2146,13 @@ static int check_map_func_compatibility(struct 
bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
goto error;
break;
+   case BPF_FUNC_map_peek_elem:
+   case BPF_FUNC_map_pop_elem:
+   case BPF_FUNC_map_push_elem:
+   if (map->map_type != BPF_MAP_TYPE_QUEUE &&
+   map->map_type != BPF_MAP_TYPE_STACK)
+   goto error;
+   break;
default:
break;
}



[RFC PATCH bpf-next v3 4/7] bpf: add bpf queue and stack maps

2018-09-17 Thread Mauricio Vasquez B
Implement two new kind of maps that support the peek, push and pop
operations.

A use case for this is to keep track of a pool of elements, like
network ports in a SNAT.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h   |3 
 include/linux/bpf_types.h |2 
 include/uapi/linux/bpf.h  |   30 
 kernel/bpf/Makefile   |2 
 kernel/bpf/core.c |3 
 kernel/bpf/helpers.c  |   98 ++
 kernel/bpf/queue_stack_maps.c |  291 +
 kernel/bpf/verifier.c |5 +
 net/core/filter.c |6 +
 9 files changed, 437 insertions(+), 3 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c63a44381d3f..8e924b5c5a0e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -807,6 +807,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct 
bpf_map *map,
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 33f7f574b983..903a446f14c3 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -67,3 +67,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4cda584c6640..c899386dcb2b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -128,6 +128,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -460,6 +462,29 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u32 len,
+ *  u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value, u32 len)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value, u32 len)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safely attempt to read *size* bytes from
@@ -2227,7 +2252,10 @@ union bpf_attr {
FN(get_current_cgroup_id),  \
FN(get_local_storage),  \
FN(sk_select_reuseport),\
-   FN(skb_ancestor_cgroup_id),
+   FN(skb_ancestor_cgroup_id), \
+   FN(map_push_elem),  \
+   FN(map_pop_elem),   \
+   FN(map_peek_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e656bce87c8f..2d77bc5b2aca 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,7 +3,7 @@ obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o 
bpf_lru_list.o lpm_trie.o map_in_map.o
-obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3f5bf1af0826..8d2db076d123 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1783,6 +1783,9 @@ BPF_CALL_0(bpf_user_rnd_u32)
 const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
 const struct bpf_func_proto bpf_map_update_elem_proto __weak;
 const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
+const struct bpf_func_proto bpf_map_push_elem_proto __weak;
+const struct bpf_func_proto

[RFC PATCH bpf-next v3 6/7] Sync uapi/bpf.h to tools/include

2018-09-17 Thread Mauricio Vasquez B
Sync both files.

Signed-off-by: Mauricio Vasquez B 
---
 tools/include/uapi/linux/bpf.h |   31 ++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 66917a4eba27..c899386dcb2b 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
@@ -127,6 +128,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -459,6 +462,29 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u32 len,
+ *  u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value, u32 len)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value, u32 len)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safely attempt to read *size* bytes from
@@ -2226,7 +2252,10 @@ union bpf_attr {
FN(get_current_cgroup_id),  \
FN(get_local_storage),  \
FN(sk_select_reuseport),\
-   FN(skb_ancestor_cgroup_id),
+   FN(skb_ancestor_cgroup_id), \
+   FN(map_push_elem),  \
+   FN(map_pop_elem),   \
+   FN(map_peek_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call



[RFC PATCH bpf-next v3 0/7] Implement bpf queue/stack maps

2018-09-17 Thread Mauricio Vasquez B
In some applications this is needed have a pool of free elements, like for
example the list of free L4 ports in a SNAT.  None of the current maps allow
to do it as it is not possibleto get an any element without having they key
it is associated to.

This patchset implements two new kind of eBPF maps: queue and stack.
Those maps provide to eBPF programs the peek, push and pop operations, and for
userspace applications a new bpf_map_lookup_and_delete_elem() is added.

Signed-off-by: Mauricio Vasquez B 

v2 -> v3:
 - Return elements by value instead that by reference
 - Implement queue/stack base on array and head + tail indexes
 - Rename stack trace related files to avoid confusion and conflicts

v1 -> v2:
 - Create two separate maps instead of single one + flags
 - Implement bpf_map_lookup_and_delete syscall
 - Support peek operation
 - Define replacement policy through flags in the update() method
 - Add eBPF side tests

---

Mauricio Vasquez B (7):
  bpf: rename stack trace map
  bpf/syscall: allow key to be null in map functions
  bpf: add lookup_and_delete map operation
  bpf: add bpf queue and stack maps
  bpf: restrict use of peek/push/pop
  Sync uapi/bpf.h to tools/include
  selftests/bpf: add test cases for queue and stack maps


 include/linux/bpf.h|4 
 include/linux/bpf_types.h  |4 
 include/uapi/linux/bpf.h   |   31 +
 kernel/bpf/Makefile|4 
 kernel/bpf/core.c  |3 
 kernel/bpf/helpers.c   |   98 +++
 kernel/bpf/queue_stack_maps.c  |  291 +
 kernel/bpf/stackmap.c  |  624 
 kernel/bpf/stacktracemap.c |  624 
 kernel/bpf/syscall.c   |  101 +++
 kernel/bpf/verifier.c  |   19 +
 net/core/filter.c  |6 
 tools/include/uapi/linux/bpf.h |   31 +
 tools/lib/bpf/bpf.c|   12 
 tools/lib/bpf/bpf.h|1 
 tools/testing/selftests/bpf/Makefile   |5 
 tools/testing/selftests/bpf/bpf_helpers.h  |7 
 tools/testing/selftests/bpf/test_maps.c|  130 
 tools/testing/selftests/bpf/test_progs.c   |   99 +++
 tools/testing/selftests/bpf/test_queue_map.c   |4 
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 ++
 tools/testing/selftests/bpf/test_stack_map.c   |4 
 22 files changed, 1526 insertions(+), 635 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c
 delete mode 100644 kernel/bpf/stackmap.c
 create mode 100644 kernel/bpf/stacktracemap.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

--



[RFC PATCH bpf-next v3 1/7] bpf: rename stack trace map

2018-09-17 Thread Mauricio Vasquez B
In the following patches queue and stack maps (FIFO and LIFO
datastructures) will be implemented.  In order to avoid confusion and
a possible name clash rename stackmap.c to stacktracemap.c and
stack_map_ops to stack_trace_map_ops

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf_types.h  |2 
 kernel/bpf/Makefile|2 
 kernel/bpf/stackmap.c  |  624 
 kernel/bpf/stacktracemap.c |  624 
 4 files changed, 626 insertions(+), 626 deletions(-)
 delete mode 100644 kernel/bpf/stackmap.c
 create mode 100644 kernel/bpf/stacktracemap.c

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index cd26c090e7c0..33f7f574b983 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -49,7 +49,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
 #ifdef CONFIG_PERF_EVENTS
-BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 0488b8258321..e656bce87c8f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -20,7 +20,7 @@ endif
 endif
 endif
 ifeq ($(CONFIG_PERF_EVENTS),y)
-obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
+obj-$(CONFIG_BPF_SYSCALL) += stacktracemap.o
 endif
 obj-$(CONFIG_CGROUP_BPF) += cgroup.o
 ifeq ($(CONFIG_INET),y)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
deleted file mode 100644
index 8061a439ef18..
--- a/kernel/bpf/stackmap.c
+++ /dev/null
@@ -1,624 +0,0 @@
-/* Copyright (c) 2016 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include "percpu_freelist.h"
-
-#define STACK_CREATE_FLAG_MASK \
-   (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY |\
-BPF_F_STACK_BUILD_ID)
-
-struct stack_map_bucket {
-   struct pcpu_freelist_node fnode;
-   u32 hash;
-   u32 nr;
-   u64 data[];
-};
-
-struct bpf_stack_map {
-   struct bpf_map map;
-   void *elems;
-   struct pcpu_freelist freelist;
-   u32 n_buckets;
-   struct stack_map_bucket *buckets[];
-};
-
-/* irq_work to run up_read() for build_id lookup in nmi context */
-struct stack_map_irq_work {
-   struct irq_work irq_work;
-   struct rw_semaphore *sem;
-};
-
-static void do_up_read(struct irq_work *entry)
-{
-   struct stack_map_irq_work *work;
-
-   work = container_of(entry, struct stack_map_irq_work, irq_work);
-   up_read(work->sem);
-   work->sem = NULL;
-}
-
-static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
-
-static inline bool stack_map_use_build_id(struct bpf_map *map)
-{
-   return (map->map_flags & BPF_F_STACK_BUILD_ID);
-}
-
-static inline int stack_map_data_size(struct bpf_map *map)
-{
-   return stack_map_use_build_id(map) ?
-   sizeof(struct bpf_stack_build_id) : sizeof(u64);
-}
-
-static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
-{
-   u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
-   int err;
-
-   smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries,
-smap->map.numa_node);
-   if (!smap->elems)
-   return -ENOMEM;
-
-   err = pcpu_freelist_init(>freelist);
-   if (err)
-   goto free_elems;
-
-   pcpu_freelist_populate(>freelist, smap->elems, elem_size,
-  smap->map.max_entries);
-   return 0;
-
-free_elems:
-   bpf_map_area_free(smap->elems);
-   return err;
-}
-
-/* Called from syscall */
-static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
-{
-   u32 value_size = attr->value_size;
-   struct bpf_stack_map *smap;
-   u64 cost, n_buckets;
-   int err;
-
-   if (!capable(CAP_SYS_ADMIN))
-   return ERR_PTR(-EPERM);
-
-   if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
-   return ERR_PTR(-EINVAL);
-
-   /* check sanity of attributes */
-   if (attr->max_entries == 0 || attr->key_size != 4 ||
-   value_size < 8 || value_size % 8)
-   return ERR_PTR(-EINVAL);
-
-   BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
-   if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
-   if (value_size % sizeof(struct bpf_stack

[RFC PATCH bpf-next v3 2/7] bpf/syscall: allow key to be null in map functions

2018-09-17 Thread Mauricio Vasquez B
This commit adds the required logic to allow key being NULL
in case the key_size of the map is 0.

A new __bpf_copy_key function helper only copies the key from
userpsace when key_size != 0, otherwise it enforces that key must be
null.

Signed-off-by: Mauricio Vasquez B 
---
 kernel/bpf/syscall.c |   19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3c9636f03bb2..f2d4e4f280dc 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -651,6 +651,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void 
*key, void *value)
return -ENOTSUPP;
 }
 
+static void *__bpf_copy_key(void __user *ukey, u64 key_size)
+{
+   if (key_size)
+   return memdup_user(ukey, key_size);
+
+   if (ukey)
+   return ERR_PTR(-EINVAL);
+
+   return NULL;
+}
+
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 
@@ -678,7 +689,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -766,7 +777,7 @@ static int map_update_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -864,7 +875,7 @@ static int map_delete_elem(union bpf_attr *attr)
goto err_put;
}
 
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -916,7 +927,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
 
if (ukey) {
-   key = memdup_user(ukey, map->key_size);
+   key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;



[RFC PATCH bpf-next v3 3/7] bpf: add lookup_and_delete map operation

2018-09-17 Thread Mauricio Vasquez B
The following patch implements a bpf queue/stack maps that
provides the peek/pop/push functions.  There is not a direct
relationship between those functions and the current operations
supported by a map, hence a new lookup_and_delete map operation
is added, this operation would be used by the pop helper.

A pop operation is not added because it will too specific to
stack/queue maps, instead this new operation could be useful
for other maps as well.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h  |1 +
 include/uapi/linux/bpf.h |1 +
 kernel/bpf/syscall.c |   82 ++
 3 files changed, 84 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 523481a3471b..c63a44381d3f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,7 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
+   void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
 
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 66917a4eba27..4cda584c6640 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f2d4e4f280dc..7d429123a298 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -968,6 +968,85 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
 }
 
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+
+static int map_lookup_and_delete_elem(union bpf_attr *attr)
+{
+   void __user *ukey = u64_to_user_ptr(attr->key);
+   void __user *uvalue = u64_to_user_ptr(attr->value);
+   int ufd = attr->map_fd;
+   struct bpf_map *map;
+   void *key, *value, *ptr;
+   u32 value_size;
+   struct fd f;
+   int err;
+
+   if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
+   return -EINVAL;
+
+   f = fdget(ufd);
+   map = __bpf_map_get(f);
+   if (IS_ERR(map))
+   return PTR_ERR(map);
+
+   if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+   err = -EPERM;
+   goto err_put;
+   }
+
+   if (!map->ops->map_lookup_and_delete_elem) {
+   err = -ENOTSUPP;
+   goto err_put;
+   }
+
+   key = __bpf_copy_key(ukey, map->key_size);
+   if (IS_ERR(key)) {
+   err = PTR_ERR(key);
+   goto err_put;
+   }
+
+   value_size = map->value_size;
+
+   err = -ENOMEM;
+   value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+   if (!value)
+   goto free_key;
+
+   err = -EFAULT;
+   if (copy_from_user(value, uvalue, value_size) != 0)
+   goto free_value;
+
+   /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+* inside bpf map update or delete otherwise deadlocks are possible
+*/
+   preempt_disable();
+   __this_cpu_inc(bpf_prog_active);
+   rcu_read_lock();
+   ptr = map->ops->map_lookup_and_delete_elem(map, key);
+   if (ptr)
+   memcpy(value, ptr, value_size);
+   rcu_read_unlock();
+   err = ptr ? 0 : -ENOENT;
+   __this_cpu_dec(bpf_prog_active);
+   preempt_enable();
+
+   if (err)
+   goto free_value;
+
+   if (copy_to_user(uvalue, value, value_size) != 0)
+   goto free_value;
+
+   err = 0;
+
+free_value:
+   kfree(value);
+free_key:
+   kfree(key);
+err_put:
+   fdput(f);
+   return err;
+}
+
 static const struct bpf_prog_ops * const bpf_prog_types[] = {
 #define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
@@ -2428,6 +2507,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, 
uattr, unsigned int, siz
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(, uattr);
break;
+   case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
+   err = map_lookup_and_delete_elem();
+   break;
default:
err = -EINVAL;
break;



[RFC PATCH bpf-next v2 3/4] Sync uapi/bpf.h to tools/include

2018-08-31 Thread Mauricio Vasquez B
Sync both files.

Signed-off-by: Mauricio Vasquez B 
---
 tools/include/uapi/linux/bpf.h |   36 
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 66917a4eba27..0a5b904ba42f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
@@ -127,6 +128,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -459,6 +462,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * void *bpf_map_pop_elem(struct bpf_map *map)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * Pointer to the element of *NULL* if there is not any.
+ *
+ * void *bpf_map_peek_elem(struct bpf_map *map)
+ * Description
+ * Return an element from *map* without removing it.
+ * Return
+ * Pointer to the element of *NULL* if there is not any.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safely attempt to read *size* bytes from
@@ -786,14 +811,14 @@ union bpf_attr {
  *
  * int ret;
  * struct bpf_tunnel_key key = {};
- * 
+ *
  * ret = bpf_skb_get_tunnel_key(skb, , sizeof(key), 0);
  * if (ret < 0)
  * return TC_ACT_SHOT; // drop packet
- * 
+ *
  * if (key.remote_ipv4 != 0x0a01)
  * return TC_ACT_SHOT; // drop packet
- * 
+ *
  * return TC_ACT_OK;   // accept packet
  *
  * This interface can also be used with all encapsulation devices
@@ -2226,7 +2251,10 @@ union bpf_attr {
FN(get_current_cgroup_id),  \
FN(get_local_storage),  \
FN(sk_select_reuseport),\
-   FN(skb_ancestor_cgroup_id),
+   FN(skb_ancestor_cgroup_id), \
+   FN(map_push_elem),  \
+   FN(map_pop_elem),   \
+   FN(map_peek_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call



[RFC PATCH bpf-next v2 4/4] selftests/bpf: add test cases for queue and stack maps

2018-08-31 Thread Mauricio Vasquez B
Two types of tests are done:
- test_maps: only userspace api.
- test_progs: userspace api and ebpf helpers.

Signed-off-by: Mauricio Vasquez B 
---
 tools/lib/bpf/bpf.c|   12 ++
 tools/lib/bpf/bpf.h|1 
 tools/testing/selftests/bpf/Makefile   |2 
 tools/testing/selftests/bpf/bpf_helpers.h  |7 +
 tools/testing/selftests/bpf/test_maps.c|  101 
 tools/testing/selftests/bpf/test_progs.c   |   99 
 tools/testing/selftests/bpf/test_queue_map.c   |4 +
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 
 tools/testing/selftests/bpf/test_stack_map.c   |4 +
 9 files changed, 288 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 60aa4ca8b2c5..7056b2eb554d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -286,6 +286,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void 
*value)
return sys_bpf(BPF_MAP_LOOKUP_ELEM, , sizeof(attr));
 }
 
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value)
+{
+   union bpf_attr attr;
+
+   bzero(, sizeof(attr));
+   attr.map_fd = fd;
+   attr.key = ptr_to_u64(key);
+   attr.value = ptr_to_u64(value);
+
+   return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, , sizeof(attr));
+}
+
 int bpf_map_delete_elem(int fd, const void *key)
 {
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6f38164b2618..6134ed9517d3 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -86,6 +86,7 @@ int bpf_map_update_elem(int fd, const void *key, const void 
*value,
__u64 flags);
 
 int bpf_map_lookup_elem(int fd, const void *key, void *value);
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value);
 int bpf_map_delete_elem(int fd, const void *key);
 int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 int bpf_obj_pin(int fd, const char *pathname);
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index fff7fb1285fc..3c773a66aa5f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o 
test_tcp_estats.o test
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o 
test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-   test_skb_cgroup_id_kern.o
+   test_skb_cgroup_id_kern.o test_queue_map.o test_stack_map.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h 
b/tools/testing/selftests/bpf/bpf_helpers.h
index e4be7730222d..05fb5ed90b89 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void 
*value,
(void *) BPF_FUNC_map_update_elem;
 static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+   unsigned long long flags) =
+   (void *) BPF_FUNC_map_push_elem;
+static void *(*bpf_map_pop_elem)(void *map) =
+   (void *) BPF_FUNC_map_pop_elem;
+static void *(*bpf_map_peek_elem)(void *map) =
+   (void *) BPF_FUNC_map_peek_elem;
 static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
(void *) BPF_FUNC_probe_read;
 static unsigned long long (*bpf_ktime_get_ns)(void) =
diff --git a/tools/testing/selftests/bpf/test_maps.c 
b/tools/testing/selftests/bpf/test_maps.c
index 6f54f84144a0..754871c7c8b4 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -471,6 +472,102 @@ static void test_devmap(int task, void *data)
close(fd);
 }
 
+static void test_queuemap(int task, void *data)
+{
+   const int MAP_SIZE = 32;
+   __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+   int fd, i;
+
+   /* Fill test values to be used */
+   for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+   vals[i] = rand();
+
+   fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE,
+   map_flags);
+   if (fd < 0) {
+   printf("Failed to create queuemap '%s'!\n", strerror(errno));
+   exit(1);
+   }
+
+   /* Push MAP_SIZE elements */
+

[RFC PATCH bpf-next v2 1/4] bpf: add bpf queue and stack maps

2018-08-31 Thread Mauricio Vasquez B
Implement two new kind of maps that support the peek, push and pop
operations.

A use case for this is to keep track of a pool of elements, like
network ports in a SNAT.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h   |8 +
 include/linux/bpf_types.h |2 
 include/uapi/linux/bpf.h  |   36 
 kernel/bpf/Makefile   |2 
 kernel/bpf/helpers.c  |   44 +
 kernel/bpf/queue_stack_maps.c |  353 +
 kernel/bpf/syscall.c  |   96 +++
 kernel/bpf/verifier.c |6 +
 net/core/filter.c |6 +
 9 files changed, 543 insertions(+), 10 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 523481a3471b..1d39b9096d9f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,11 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
+   void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
+
+   /* funcs callable from eBPF programs */
+   void *(*map_lookup_or_init_elem)(struct bpf_map *map, void *key,
+void *value);
 
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -806,6 +811,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct 
bpf_map *map,
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index cd26c090e7c0..8d955f11f1cd 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -67,3 +67,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, queue_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 66917a4eba27..0a5b904ba42f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 };
 
 enum bpf_map_type {
@@ -127,6 +128,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -459,6 +462,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * void *bpf_map_pop_elem(struct bpf_map *map)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * Pointer to the element of *NULL* if there is not any.
+ *
+ * void *bpf_map_peek_elem(struct bpf_map *map)
+ * Description
+ * Return an element from *map* without removing it.
+ * Return
+ * Pointer to the element of *NULL* if there is not any.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safely attempt to read *size* bytes from
@@ -786,14 +811,14 @@ union bpf_attr {
  *
  * int ret;
  * struct bpf_tunnel_key key = {};
- * 
+ *
  * ret = bpf_skb_get_tunnel_key(skb, , sizeof(key), 0);
  * if (ret < 0)
  * return TC_ACT_SHOT; // drop packet
- * 
+ *
  * if (key.remote_ipv4 != 0x0a01)
  * return TC_ACT_SHOT; // drop packet
- * 
+ *
  * return TC_ACT_OK;   // accept packet
  *
  * This interface can also be used with all encapsulation devices
@@ -2226,7 +2251,10 @@ union bpf_attr {
   

[RFC PATCH bpf-next v2 2/4] bpf: restrict use of peek/push/pop

2018-08-31 Thread Mauricio Vasquez B
Restrict the use of peek, push and pop helpers only to queue and stack
maps.

Signed-off-by: Mauricio Vasquez B 
---
 kernel/bpf/verifier.c |   14 ++
 1 file changed, 14 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5bd67feb2f07..9e177ff4a3b9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2172,6 +2172,13 @@ static int check_map_func_compatibility(struct 
bpf_verifier_env *env,
if (func_id != BPF_FUNC_sk_select_reuseport)
goto error;
break;
+   case BPF_MAP_TYPE_QUEUE:
+   case BPF_MAP_TYPE_STACK:
+   if (func_id != BPF_FUNC_map_peek_elem &&
+   func_id != BPF_FUNC_map_pop_elem &&
+   func_id != BPF_FUNC_map_push_elem)
+   goto error;
+   break;
default:
break;
}
@@ -2227,6 +2234,13 @@ static int check_map_func_compatibility(struct 
bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
goto error;
break;
+   case BPF_FUNC_map_peek_elem:
+   case BPF_FUNC_map_pop_elem:
+   case BPF_FUNC_map_push_elem:
+   if (map->map_type != BPF_MAP_TYPE_QUEUE &&
+   map->map_type != BPF_MAP_TYPE_STACK)
+   goto error;
+   break;
default:
break;
}



[RFC PATCH bpf-next v2 0/4] Implement bpf queue/stack maps

2018-08-31 Thread Mauricio Vasquez B
In some applications this is needed have a pool of free elements, like for
example the list of free L4 ports in a SNAT.  None of the current maps allow
to do it as it is not possibleto get an any element without having they key
it is associated to.

This patchset implements two new kind of eBPF maps: queue and stack.
Those maps provide to eBPF programs the peek, push and pop operations, and for
userspace applications a new bpf_map_lookup_and_delete_elem() is added.

Signed-off-by: Mauricio Vasquez B 

---

I am sending this as an RFC because there is still an issue I am not sure how
to solve.

The queue/stack maps have a linked list for saving the nodes, and a
preallocation schema based on the pcpu_freelist already implemented and used
in the htabmap.  Each time an element is pushed into the map, a node from the
pcpu_freelist is taken and then added to the linked list.

The pop operation takes and *removes* the first node from the linked list, then
it uses *call_rcu* to postpose freeing the node, i.e, the node is only returned
to the pcpu_freelist when the rcu callback is executed.  This is needed because
an element returned by the pop() operation should remain valid for the whole
duration of the eBPF program.

The problem is that elements are not immediately returned to the free list, so
in some cases the push operation could fail because there are not free nodes
in the pcpu_freelist.

The following code snippet exposes that problem.

...
/* Push MAP_SIZE elements */
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_update_elem(fd, NULL, [i], 0) == 0);

/* Pop all elements */
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_lookup_and_delete_elem(fd, NULL, ) == 0 &&
   val == vals[i]);

  // sleep(1) <-- If I put this sleep, everything works.
/* Push MAP_SIZE elements */
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_update_elem(fd, NULL, [i], 0) == 0);
   ^^^
   This fails because there are not available elements in pcpu_freelist
...

I think a possible solution is to oversize the pcpu_freelist (no idea by how
much, maybe double or, or make it 1.5 time the max elements in the map?)
I also have concerns about it, it would waste that memory in many cases and
this is also probably that it doesn't solve the issue because that code snippet
is puhsing and popping elements too fast, so even if the pcpu_freelist is much
large a certain time instant all the elements could be used.

Is this really an important issue?
Any idea of how to solve it?

Thanks,
---

Mauricio Vasquez B (4):
  bpf: add bpf queue and stack maps
  bpf: restrict use of peek/push/pop
  Sync uapi/bpf.h to tools/include
  selftests/bpf: add test cases for queue and stack maps


 include/linux/bpf.h|8 
 include/linux/bpf_types.h  |2 
 include/uapi/linux/bpf.h   |   36 ++
 kernel/bpf/Makefile|2 
 kernel/bpf/helpers.c   |   44 ++
 kernel/bpf/queue_stack_maps.c  |  353 
 kernel/bpf/syscall.c   |   96 +
 kernel/bpf/verifier.c  |   20 +
 net/core/filter.c  |6 
 tools/include/uapi/linux/bpf.h |   36 ++
 tools/lib/bpf/bpf.c|   12 +
 tools/lib/bpf/bpf.h|1 
 tools/testing/selftests/bpf/Makefile   |2 
 tools/testing/selftests/bpf/bpf_helpers.h  |7 
 tools/testing/selftests/bpf/test_maps.c|  101 ++
 tools/testing/selftests/bpf/test_progs.c   |   99 ++
 tools/testing/selftests/bpf/test_queue_map.c   |4 
 tools/testing/selftests/bpf/test_queue_stack_map.h |   59 +++
 tools/testing/selftests/bpf/test_stack_map.c   |4 
 19 files changed, 877 insertions(+), 15 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
 create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
 create mode 100644 tools/testing/selftests/bpf/test_stack_map.c

--



[PATCH bpf-next 3/3] bpf: add sample for BPF_MAP_TYPE_QUEUE

2018-08-06 Thread Mauricio Vasquez B
The example is made by two parts, a eBPF program that consumes elements
from a FIFO queue and prints them in the screen and a user space
application that inserts new elements into the queue each time this is
executed.

Signed-off-by: Mauricio Vasquez B 
---
 samples/bpf/.gitignore |1 +
 samples/bpf/Makefile   |3 ++
 samples/bpf/test_map_in_map_user.c |9 +-
 samples/bpf/test_queuemap.sh   |   37 +
 samples/bpf/test_queuemap_kern.c   |   51 +++
 samples/bpf/test_queuemap_user.c   |   53 
 6 files changed, 147 insertions(+), 7 deletions(-)
 create mode 100755 samples/bpf/test_queuemap.sh
 create mode 100644 samples/bpf/test_queuemap_kern.c
 create mode 100644 samples/bpf/test_queuemap_user.c

diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index 8ae4940025f8..d7e518c1b3ed 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -26,6 +26,7 @@ test_lru_dist
 test_map_in_map
 test_overhead
 test_probe_write_user
+test_queuemap
 trace_event
 trace_output
 tracex1
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f88d5683d6ee..624f4f4b81db 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += xdpsock
 hostprogs-y += xdp_fwd
 hostprogs-y += task_fd_query
 hostprogs-y += xdp_sample_pkts
+hostprogs-y += test_queuemap
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o
 xdp_fwd-objs := xdp_fwd_user.o
 task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
 xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
+test_queuemap-objs := bpf_load.o test_queuemap_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -166,6 +168,7 @@ always += xdpsock_kern.o
 always += xdp_fwd_kern.o
 always += task_fd_query_kern.o
 always += xdp_sample_pkts_kern.o
+always += test_queuemap_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/test_map_in_map_user.c 
b/samples/bpf/test_map_in_map_user.c
index e308858f7bcf..28edac94234e 100644
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -1,10 +1,5 @@
-/*
- * Copyright (c) 2017 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Politecnico di Torino */
 #include 
 #include 
 #include 
diff --git a/samples/bpf/test_queuemap.sh b/samples/bpf/test_queuemap.sh
new file mode 100755
index ..ed08c1fa8c2c
--- /dev/null
+++ b/samples/bpf/test_queuemap.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+[[ -z $TC ]] && TC='tc'
+[[ -z $IP ]] && IP='ip'
+
+TEST_QUEUE_USER='./test_queuemap'
+TEST_QUEUE_BPF='./test_queuemap_kern.o'
+
+function config {
+   $IP netns add ns1
+   $IP link add ve1 type veth peer name vens1
+   $IP link set dev ve1 up
+   $IP link set dev ve1 mtu 1500
+   $IP link set dev vens1 netns ns1
+
+   $IP -n ns1 link set dev lo up
+   $IP -n ns1 link set dev vens1 up
+   $IP -n ns1 addr add 10.1.1.101/24 dev vens1
+
+   $IP addr add 10.1.1.1/24 dev ve1
+   $TC qdisc add dev ve1 clsact
+   $TC filter add dev ve1 ingress bpf da obj $TEST_QUEUE_BPF sec test_queue
+}
+
+function cleanup {
+   set +e
+   [[ -z $DEBUG ]] || set +x
+   $IP netns delete ns1 >& /dev/null
+   $IP link del ve1 >& /dev/null
+   rm -f /sys/fs/bpf/tc/globals/queue
+   [[ -z $DEBUG ]] || set -x
+   set -e
+}
+
+cleanup
+config
diff --git a/samples/bpf/test_queuemap_kern.c b/samples/bpf/test_queuemap_kern.c
new file mode 100644
index ..2b496dafaffd
--- /dev/null
+++ b/samples/bpf/test_queuemap_kern.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Politecnico di Torino */
+#define KBUILD_MODNAME "foo"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "bpf_helpers.h"
+
+#define PIN_GLOBAL_NS  2
+
+struct bpf_elf_map {
+   __u32 type;
+   __u32 key_size;
+   __u32 value_size;
+   __u32 max_entries;
+   __u32 flags;
+   __u32 id;
+   __u32 pinning;
+};
+
+/* map #0 */
+struct bpf_elf_map SEC("maps") queue = {
+   .type = BPF_MAP_TYPE_QUEUE,
+   .key_size = 0,
+   .value_size = sizeof(u32),
+   .flags = BPF_F_QUEUE_FIFO,
+   .max_entries = 1024,
+   .pinning = PIN_GLOBAL_NS,
+};
+
+SEC("test_queue")
+int _test_queue(struct __sk_buff *skb)
+{
+   char msg[] = "element is %u\n";
+   char msg_no[] = "there are not elements\n";
+
+   u32 *val = bpf_map_lookup_

[PATCH bpf-next 2/3] selftests/bpf: add test cases for BPF_MAP_TYPE_QUEUE

2018-08-06 Thread Mauricio Vasquez B
Signed-off-by: Mauricio Vasquez B 
---
 tools/include/uapi/linux/bpf.h  |5 ++
 tools/testing/selftests/bpf/test_maps.c |   72 +++
 2 files changed, 77 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0ebaaf7f3568..2c171c40eb45 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -120,6 +120,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
+   BPF_MAP_TYPE_QUEUE,
 };
 
 enum bpf_prog_type {
@@ -255,6 +256,10 @@ enum bpf_attach_type {
 /* Flag for stack_map, store build_id+offset instead of pointer */
 #define BPF_F_STACK_BUILD_ID   (1U << 5)
 
+/* Flags for queue_map, type of queue */
+#define BPF_F_QUEUE_FIFO   (1U << 16)
+#define BPF_F_QUEUE_LIFO   (2U << 16)
+
 enum bpf_stack_build_id_status {
/* user space need an empty entry to identify end of a trace */
BPF_STACK_BUILD_ID_EMPTY = 0,
diff --git a/tools/testing/selftests/bpf/test_maps.c 
b/tools/testing/selftests/bpf/test_maps.c
index 6c253343a6f9..34567b017dbb 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -457,6 +457,77 @@ static void test_devmap(int task, void *data)
close(fd);
 }
 
+static void test_queuemap(int task, void *data)
+{
+   __u32 value;
+   int fd, i;
+
+   /* test FIFO */
+   fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(value), 32,
+   BPF_F_QUEUE_FIFO);
+   if (fd < 0) {
+   printf("Failed to create queuemap '%s'!\n", strerror(errno));
+   exit(1);
+   }
+
+   /* Push 32 elements */
+   for (i = 0; i < 32; i++) {
+   value = 1000 - i * 3;
+   assert(bpf_map_update_elem(fd, NULL, , 0) == 0);
+   }
+
+   /* Check that element cannot be pushed due to max_entries limit */
+   value = 1000;
+   assert(bpf_map_update_elem(fd, NULL, , 0) == -1 &&
+  errno == E2BIG);
+
+   /* Pop all elements */
+   for (i = 0; i < 32; i++)
+   assert(bpf_map_lookup_elem(fd, NULL, ) == 0 &&
+  value == (1000 - i * 3));
+
+   /* Check that there are not elements left */
+   assert(bpf_map_lookup_elem(fd, NULL, ) == -1 && errno == ENOENT);
+
+   assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+   assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+   close(fd);
+
+   /* test LIFO */
+   fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(value), 32,
+   BPF_F_QUEUE_LIFO);
+   if (fd < 0) {
+   printf("Failed to create queuemap '%s'!\n", strerror(errno));
+   exit(1);
+   }
+
+   /* Push 32 elements */
+   for (i = 0; i < 32; i++) {
+   value = 1000 - i * 3;
+   assert(bpf_map_update_elem(fd, NULL, , 0) == 0);
+   }
+
+   /* Check that element cannot be pushed due to max_entries limit */
+   value = 1000;
+   assert(bpf_map_update_elem(fd, NULL, , 0) == -1 &&
+  errno == E2BIG);
+
+   /* Pop all elements */
+   for (i = 31; i >= 0; i--)
+   assert(bpf_map_lookup_elem(fd, NULL, ) == 0 &&
+  value == (1000 - i * 3));
+
+   /* Check that there are not elements left */
+   assert(bpf_map_lookup_elem(fd, NULL, ) == -1 &&
+  errno == ENOENT);
+
+   assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+   assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+   close(fd);
+}
+
 #include 
 #include 
 #include 
@@ -1162,6 +1233,7 @@ static void run_all_tests(void)
test_arraymap_percpu_many_keys();
 
test_devmap(0, NULL);
+   test_queuemap(0, NULL);
test_sockmap(0, NULL);
 
test_map_large();



[PATCH bpf-next 1/3] bpf: add bpf queue map

2018-08-06 Thread Mauricio Vasquez B
Bpf queue implements a LIFO/FIFO data containers for ebpf programs.

It allows to push an element to the queue by using the update operation
and to pop an element from the queue by using the lookup operation.

A use case for this is to keep track of a pool of elements, like
network ports in a SNAT.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf_types.h |1 
 include/uapi/linux/bpf.h  |5 +
 kernel/bpf/Makefile   |2 
 kernel/bpf/queuemap.c |  287 +
 kernel/bpf/syscall.c  |   61 +++---
 kernel/bpf/verifier.c |   16 ++-
 6 files changed, 353 insertions(+), 19 deletions(-)
 create mode 100644 kernel/bpf/queuemap.c

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index c5700c2d5549..6c7a62f3fe43 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -58,3 +58,4 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0ebaaf7f3568..2c171c40eb45 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -120,6 +120,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
+   BPF_MAP_TYPE_QUEUE,
 };
 
 enum bpf_prog_type {
@@ -255,6 +256,10 @@ enum bpf_attach_type {
 /* Flag for stack_map, store build_id+offset instead of pointer */
 #define BPF_F_STACK_BUILD_ID   (1U << 5)
 
+/* Flags for queue_map, type of queue */
+#define BPF_F_QUEUE_FIFO   (1U << 16)
+#define BPF_F_QUEUE_LIFO   (2U << 16)
+
 enum bpf_stack_build_id_status {
/* user space need an empty entry to identify end of a trace */
BPF_STACK_BUILD_ID_EMPTY = 0,
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f27f5496d6fe..30f02ef66635 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,7 +2,7 @@
 obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
-obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o 
bpf_lru_list.o lpm_trie.o map_in_map.o
+obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o 
bpf_lru_list.o lpm_trie.o map_in_map.o queuemap.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/queuemap.c b/kernel/bpf/queuemap.c
new file mode 100644
index ..ab30af43b4cc
--- /dev/null
+++ b/kernel/bpf/queuemap.c
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * queuemap.c: BPF queue map
+ *
+ * Copyright (c) 2018 Politecnico di Torino
+ */
+#include 
+#include 
+#include 
+#include "percpu_freelist.h"
+
+#define QUEUE_CREATE_FLAG_MASK \
+   (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \
+BPF_F_QUEUE_FIFO | BPF_F_QUEUE_LIFO)
+
+enum queue_type {
+   QUEUE_FIFO = (BPF_F_QUEUE_FIFO >> 16),
+   QUEUE_LIFO = (BPF_F_QUEUE_LIFO >> 16),
+};
+
+struct bpf_queue {
+   struct bpf_map map;
+   struct list_head head;
+   struct pcpu_freelist freelist;
+   void *nodes;
+   enum queue_type type;
+   raw_spinlock_t lock;
+   atomic_t count;
+   u32 node_size;
+};
+
+struct queue_node {
+   struct pcpu_freelist_node fnode;
+   struct bpf_queue *queue;
+   struct list_head list;
+   struct rcu_head rcu;
+   char element[0] __aligned(8);
+};
+
+static bool queue_map_is_prealloc(struct bpf_queue *queue)
+{
+   return !(queue->map.map_flags & BPF_F_NO_PREALLOC);
+}
+
+/* Called from syscall */
+static int queue_map_alloc_check(union bpf_attr *attr)
+{
+   /* check sanity of attributes */
+   if (attr->max_entries == 0 || attr->key_size != 0 ||
+   attr->value_size == 0 || attr->map_flags & ~QUEUE_CREATE_FLAG_MASK)
+   return -EINVAL;
+
+   if ((attr->map_flags >> 16) != QUEUE_FIFO &&
+   (attr->map_flags >> 16) != QUEUE_LIFO) {
+   return -EINVAL;
+   }
+
+   if (attr->value_size > KMALLOC_MAX_SIZE)
+   /* if value_size is bigger, the user space won't be able to
+* access the elements.
+*/
+   return -E2BIG;
+
+   return 0;
+}
+
+static int prealloc_init(struct bpf_queue *queue)
+{
+   u32 node_size = sizeof(struct queue_node) +
+   round_up(queue->map.value_size, 8);
+   u32 num_entries = queue->map.max_entries;
+   int err;
+
+   queue->nodes = bpf_map_area_alloc(node_size * num_entries,
+ queue->map.numa_node);
+   if (!queue->nodes)
+   return -ENOMEM;
+
+   err = pcpu_freelist_init(>freelist);
+   if (err)
+   goto free_nodes;
+
+   pcpu_freelist

[PATCH bpf-next 0/3] Implement bpf map queue

2018-08-06 Thread Mauricio Vasquez B
Bpf queue map is a new kind of map that provides a LIFO/FIFO queue
implementation.

In some applications, like a SNAT, it is necessary to keep track of
a pool of free elemenets, network ports in this case, then a queue
can be used for that purpose.

Signed-off-by: Mauricio Vasquez B 
---
Mauricio Vasquez B (3):
  bpf: add bpf queue map
  selftests/bpf: add test cases for BPF_MAP_TYPE_QUEUE
  bpf: add sample for BPF_MAP_TYPE_QUEUE


 include/linux/bpf_types.h   |1 
 include/uapi/linux/bpf.h|5 +
 kernel/bpf/Makefile |2 
 kernel/bpf/queuemap.c   |  287 +++
 kernel/bpf/syscall.c|   61 +--
 kernel/bpf/verifier.c   |   16 +-
 samples/bpf/.gitignore  |1 
 samples/bpf/Makefile|3 
 samples/bpf/test_map_in_map_user.c  |9 -
 samples/bpf/test_queuemap.sh|   37 
 samples/bpf/test_queuemap_kern.c|   51 ++
 samples/bpf/test_queuemap_user.c|   53 ++
 tools/include/uapi/linux/bpf.h  |5 +
 tools/testing/selftests/bpf/test_maps.c |   72 
 14 files changed, 577 insertions(+), 26 deletions(-)
 create mode 100644 kernel/bpf/queuemap.c
 create mode 100755 samples/bpf/test_queuemap.sh
 create mode 100644 samples/bpf/test_queuemap_kern.c
 create mode 100644 samples/bpf/test_queuemap_user.c

--



[PATCH bpf] bpf: hash_map: decrement counter on error

2018-06-29 Thread Mauricio Vasquez B
Decrement the number of elements in the map in case the allocation
of a new node fails.

Signed-off-by: Mauricio Vasquez B 
---
 kernel/bpf/hashtab.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3ca2198..513d9df 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -747,13 +747,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab 
*htab, void *key,
 * old element will be freed immediately.
 * Otherwise return an error
 */
-   atomic_dec(>count);
-   return ERR_PTR(-E2BIG);
+   l_new = ERR_PTR(-E2BIG);
+   goto dec_count;
}
l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
 htab->map.numa_node);
-   if (!l_new)
-   return ERR_PTR(-ENOMEM);
+   if (!l_new) {
+   l_new = ERR_PTR(-ENOMEM);
+   goto dec_count;
+   }
}
 
memcpy(l_new->key, key, key_size);
@@ -766,7 +768,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab 
*htab, void *key,
  GFP_ATOMIC | __GFP_NOWARN);
if (!pptr) {
kfree(l_new);
-   return ERR_PTR(-ENOMEM);
+   l_new = ERR_PTR(-ENOMEM);
+   goto dec_count;
}
}
 
@@ -780,6 +783,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab 
*htab, void *key,
 
l_new->hash = hash;
return l_new;
+dec_count:
+   atomic_dec(>count);
+   return l_new;
 }
 
 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
-- 
2.7.4