When running bpf_map_lookup on percpu elements, the bytes copied to
userspace depends on num_possible_cpus() * value_size, which could
potentially be larger than memory allocated from user, which depends
on sysconf(_SC_NPROCESSORS_CONF) to get the current cpu num.  As a
result, the inconsistency might corrupt the user's stack.

The fact that sysconf(_SC_NPROCESSORS_CONF) != num_possible_cpu()
happens when cpu hotadd is enabled.  For example, in Fusion when
setting vcpu.hotadd = "TRUE" or in KVM, setting
  ./qemu-system-x86_64 -smp 2, maxcpus=4 ...
the num_possible_cpu() will be 4 and sysconf() will be 2[1].
Currently the any percpu map lookup suffers this issue, try
samples/bpf/test_maps.c or tracex3.c.

Th RFC patch adds additional 'size' param from userspace so that
kernel knows the maximum memory it should copy to the user.

[1] https://www.mail-archive.com/netdev@vger.kernel.org/msg121183.html

Signed-off-by: William Tu <u9012...@gmail.com>
---
 include/uapi/linux/bpf.h       |  5 ++++-
 kernel/bpf/syscall.c           |  5 +++--
 samples/bpf/fds_example.c      |  2 +-
 samples/bpf/libbpf.c           |  3 ++-
 samples/bpf/libbpf.h           |  2 +-
 samples/bpf/test_maps.c        | 30 +++++++++++++++---------------
 tools/include/uapi/linux/bpf.h |  5 ++++-
 7 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f09c70b..fa0c40b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -123,7 +123,10 @@ union bpf_attr {
                        __aligned_u64 value;
                        __aligned_u64 next_key;
                };
-               __u64           flags;
+               union {
+                       __u64           flags;
+                       __u32           size; /* number of bytes allocated in 
userspace */
+               };
        };
 
        struct { /* anonymous struct used by BPF_PROG_LOAD command */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 228f962..be211ea 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -264,13 +264,14 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void 
*key, void *value)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
+#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD size
 
 static int map_lookup_elem(union bpf_attr *attr)
 {
        void __user *ukey = u64_to_ptr(attr->key);
        void __user *uvalue = u64_to_ptr(attr->value);
        int ufd = attr->map_fd;
+       u32 usize = attr->size;
        struct bpf_map *map;
        void *key, *value, *ptr;
        u32 value_size;
@@ -324,7 +325,7 @@ static int map_lookup_elem(union bpf_attr *attr)
                goto free_value;
 
        err = -EFAULT;
-       if (copy_to_user(uvalue, value, value_size) != 0)
+       if (copy_to_user(uvalue, value, min_t(u32, usize, value_size)) != 0)
                goto free_value;
 
        err = 0;
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
index 625e797..5b833d8 100644
--- a/samples/bpf/fds_example.c
+++ b/samples/bpf/fds_example.c
@@ -88,7 +88,7 @@ static int bpf_do_map(const char *file, uint32_t flags, 
uint32_t key,
                       ret, strerror(errno));
                assert(ret == 0);
        } else if (flags & BPF_F_KEY) {
-               ret = bpf_lookup_elem(fd, &key, &value);
+               ret = bpf_lookup_elem(fd, &key, &value, sizeof(value));
                printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value,
                       ret, strerror(errno));
                assert(ret == 0);
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 9969e35..9f0a1c3 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -44,12 +44,13 @@ int bpf_update_elem(int fd, void *key, void *value, 
unsigned long long flags)
        return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
 }
 
-int bpf_lookup_elem(int fd, void *key, void *value)
+int bpf_lookup_elem(int fd, void *key, void *value, int size)
 {
        union bpf_attr attr = {
                .map_fd = fd,
                .key = ptr_to_u64(key),
                .value = ptr_to_u64(value),
+               .size = size,
        };
 
        return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index ac6edb6..b911185 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -7,7 +7,7 @@ struct bpf_insn;
 int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
                   int max_entries, int map_flags);
 int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags);
-int bpf_lookup_elem(int fd, void *key, void *value);
+int bpf_lookup_elem(int fd, void *key, void *value, int size);
 int bpf_delete_elem(int fd, void *key);
 int bpf_get_next_key(int fd, void *key, void *next_key);
 
diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c
index cce2b59..a6a8fbe 100644
--- a/samples/bpf/test_maps.c
+++ b/samples/bpf/test_maps.c
@@ -47,11 +47,11 @@ static void test_hashmap_sanity(int i, void *data)
        assert(bpf_update_elem(map_fd, &key, &value, -1) == -1 && errno == 
EINVAL);
 
        /* check that key=1 can be found */
-       assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234);
+       assert(bpf_lookup_elem(map_fd, &key, &value, sizeof(value)) == 0 && 
value == 1234);
 
        key = 2;
        /* check that key=2 is not found */
-       assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_lookup_elem(map_fd, &key, &value, sizeof(value)) == -1 && 
errno == ENOENT);
 
        /* BPF_EXIST means: update existing element */
        assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 &&
@@ -139,11 +139,11 @@ static void test_percpu_hashmap_sanity(int task, void 
*data)
         * was run from a different cpu.
         */
        value[0] = 1;
-       assert(bpf_lookup_elem(map_fd, &key, value) == 0 && value[0] == 100);
+       assert(bpf_lookup_elem(map_fd, &key, value, sizeof(value)) == 0 && 
value[0] == 100);
 
        key = 2;
        /* check that key=2 is not found */
-       assert(bpf_lookup_elem(map_fd, &key, value) == -1 && errno == ENOENT);
+       assert(bpf_lookup_elem(map_fd, &key, value, sizeof(value)) == -1 && 
errno == ENOENT);
 
        /* BPF_EXIST means: update existing element */
        assert(bpf_update_elem(map_fd, &key, value, BPF_EXIST) == -1 &&
@@ -170,7 +170,7 @@ static void test_percpu_hashmap_sanity(int task, void *data)
                assert((expected_key_mask & next_key) == next_key);
                expected_key_mask &= ~next_key;
 
-               assert(bpf_lookup_elem(map_fd, &next_key, value) == 0);
+               assert(bpf_lookup_elem(map_fd, &next_key, value, sizeof(value)) 
== 0);
                for (i = 0; i < nr_cpus; i++)
                        assert(value[i] == i + 100);
 
@@ -218,11 +218,11 @@ static void test_arraymap_sanity(int i, void *data)
               errno == EEXIST);
 
        /* check that key=1 can be found */
-       assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234);
+       assert(bpf_lookup_elem(map_fd, &key, &value, sizeof(value)) == 0 && 
value == 1234);
 
        key = 0;
        /* check that key=0 is also found and zero initialized */
-       assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0);
+       assert(bpf_lookup_elem(map_fd, &key, &value, sizeof(value)) == 0 && 
value == 0);
 
 
        /* key=0 and key=1 were inserted, check that key=2 cannot be inserted
@@ -233,7 +233,7 @@ static void test_arraymap_sanity(int i, void *data)
               errno == E2BIG);
 
        /* check that key = 2 doesn't exist */
-       assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_lookup_elem(map_fd, &key, &value, sizeof(value)) == -1 && 
errno == ENOENT);
 
        /* iterate over two elements */
        assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
@@ -274,7 +274,7 @@ static void test_percpu_arraymap_many_keys(void)
        for (key = 0; key < nr_keys; key++) {
                for (i = 0; i < nr_cpus; i++)
                        values[i] = 0;
-               assert(bpf_lookup_elem(map_fd, &key, values) == 0);
+               assert(bpf_lookup_elem(map_fd, &key, values, sizeof(values)) == 
0);
                for (i = 0; i < nr_cpus; i++)
                        assert(values[i] == i + 10);
        }
@@ -307,11 +307,11 @@ static void test_percpu_arraymap_sanity(int i, void *data)
               errno == EEXIST);
 
        /* check that key=1 can be found */
-       assert(bpf_lookup_elem(map_fd, &key, values) == 0 && values[0] == 100);
+       assert(bpf_lookup_elem(map_fd, &key, values, sizeof(values)) == 0 && 
values[0] == 100);
 
        key = 0;
        /* check that key=0 is also found and zero initialized */
-       assert(bpf_lookup_elem(map_fd, &key, values) == 0 &&
+       assert(bpf_lookup_elem(map_fd, &key, values, sizeof(values)) == 0 &&
               values[0] == 0 && values[nr_cpus - 1] == 0);
 
 
@@ -321,7 +321,7 @@ static void test_percpu_arraymap_sanity(int i, void *data)
               errno == E2BIG);
 
        /* check that key = 2 doesn't exist */
-       assert(bpf_lookup_elem(map_fd, &key, values) == -1 && errno == ENOENT);
+       assert(bpf_lookup_elem(map_fd, &key, values, sizeof(values)) == -1 && 
errno == ENOENT);
 
        /* iterate over two elements */
        assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
@@ -371,9 +371,9 @@ static void test_map_large(void)
        assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
 
        key.c = 0;
-       assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0);
+       assert(bpf_lookup_elem(map_fd, &key, &value, sizeof(value)) == 0 && 
value == 0);
        key.a = 1;
-       assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_lookup_elem(map_fd, &key, &value, sizeof(value)) == -1 && 
errno == ENOENT);
 
        close(map_fd);
 }
@@ -466,7 +466,7 @@ static void test_map_parallel(void)
        /* another check for all elements */
        for (i = 0; i < MAP_SIZE; i++) {
                key = MAP_SIZE - i - 1;
-               assert(bpf_lookup_elem(map_fd, &key, &value) == 0 &&
+               assert(bpf_lookup_elem(map_fd, &key, &value, sizeof(value)) == 
0 &&
                       value == key);
        }
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9e5fc16..719fa25 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -122,7 +122,10 @@ union bpf_attr {
                        __aligned_u64 value;
                        __aligned_u64 next_key;
                };
-               __u64           flags;
+               union {
+                       __u64           flags;
+                       __u32           size; /* number of bytes allocated in 
userspace */
+               };
        };
 
        struct { /* anonymous struct used by BPF_PROG_LOAD command */
-- 
2.5.0

Reply via email to