On 8/9/18 8:59 AM, Daniel Borkmann wrote:
On 08/09/2018 05:15 PM, Yonghong Song wrote:
On 8/9/18 7:24 AM, Daniel Borkmann wrote:
On 08/09/2018 05:55 AM, Yonghong Song wrote:
On 8/8/18 7:25 PM, Alexei Starovoitov wrote:
On Wed, Aug 08, 2018 at 06:25:19PM -0700, Yonghong Song wrote:
In function map_seq_next() of kernel/bpf/inode.c,
the first key will be the "0" regardless of the map type.
This works for array. But for hash type, if it happens
key "0" is in the map, the bpffs map show will miss
some items if the key "0" is not the first element of
the first bucket.
This patch fixed the issue by guaranteeing to get
the first element, if the seq_show is just started,
by passing NULL pointer key to map_get_next_key() callback.
This way, no missing elements will occur for
bpffs hash table show even if key "0" is in the map.
Currently, map_seq_show_elem callback is only implemented
for arraymap. So the problem actually is not exposed.
The issue is discovered when I tried to implement
map_seq_show_elem for hash maps, and I will have followup
patches for it.
Btw, on that note, I would also prefer if we could decouple
BTF from the map_seq_show_elem() as there is really no reason
to have it on a per-map. I had a patch below which would enable
it for all map types generically, and bpftool works out of the
box for it. Also, array doesn't really have to be 'int' type
enforced as long as it's some data structure with 4 bytes, it's
all fine, so this can be made fully generic (we only eventually
care about the match in size).
I agree with a generic map_check_btf as mostly we only care about size
and this change should enable btftool btf based pretty print for
hash/lru_hash tables.
From 0a8be27cbc2ac0c6fc2632865b5afe37222a1fc7 Mon Sep 17 00:00:00 2001
Message-Id:
<0a8be27cbc2ac0c6fc2632865b5afe37222a1fc7.1533830053.git.dan...@iogearbox.net>
From: Daniel Borkmann <dan...@iogearbox.net>
Date: Thu, 9 Aug 2018 16:50:21 +0200
Subject: [PATCH bpf-next] bpf, btf: enable for all maps
# bpftool m dump id 19
[{
"key": {
"": {
"vip": 0,
"vipv6": []
},
"port": 0,
"family": 0,
"proto": 0
},
"value": {
"flags": 0,
"vip_num": 0
}
}
]
Signed-off-by: Daniel Borkmann <dan...@iogearbox.net>
---
include/linux/bpf.h | 4 +---
kernel/bpf/arraymap.c | 27 ---------------------------
kernel/bpf/inode.c | 3 ++-
kernel/bpf/syscall.c | 24 ++++++++++++++++++++----
4 files changed, 23 insertions(+), 35 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d..91aa4be 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -48,8 +48,6 @@ struct bpf_map_ops {
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
struct seq_file *m);
- int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
- u32 key_type_id, u32 value_type_id);
};
struct bpf_map {
@@ -118,7 +116,7 @@ static inline bool bpf_map_offload_neutral(const struct
bpf_map *map)
static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
{
- return map->ops->map_seq_show_elem && map->ops->map_check_btf;
+ return map->ops->map_seq_show_elem;
}
extern const struct bpf_map_ops bpf_map_offload_ops;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2aa55d030..67f0bdf 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -358,32 +358,6 @@ static void array_map_seq_show_elem(struct bpf_map *map,
void *key,
rcu_read_unlock();
}
-static int array_map_check_btf(const struct bpf_map *map, const struct btf
*btf,
- u32 btf_key_id, u32 btf_value_id)
-{
- const struct btf_type *key_type, *value_type;
- u32 key_size, value_size;
- u32 int_data;
-
- key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
- if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
- return -EINVAL;
-
- int_data = *(u32 *)(key_type + 1);
- /* bpf array can only take a u32 key. This check makes
- * sure that the btf matches the attr used during map_create.
- */
- if (BTF_INT_BITS(int_data) != 32 || key_size != 4 ||
- BTF_INT_OFFSET(int_data))
- return -EINVAL;
-
- value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
- if (!value_type || value_size != map->value_size)
- return -EINVAL;
-
- return 0;
-}
-
const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
@@ -394,7 +368,6 @@ const struct bpf_map_ops array_map_ops = {
.map_delete_elem = array_map_delete_elem,
.map_gen_lookup = array_map_gen_lookup,
.map_seq_show_elem = array_map_seq_show_elem,
- .map_check_btf = array_map_check_btf,
};
const struct bpf_map_ops percpu_array_map_ops = {
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 76efe9a..400f27d 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -332,7 +332,8 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode,
void *arg)
struct bpf_map *map = arg;
return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
- map->btf ? &bpffs_map_fops : &bpffs_obj_fops);
+ bpf_map_support_seq_show(map) ?
+ &bpffs_map_fops : &bpffs_obj_fops);
There are an issue here, the condition bpf_map_support_seq_show(map) may
not be enough since the map specific implementation assumes availability
of btf and proper map key/value btf_type_id's.
We can either use
map->btf && bpf_map_support_seq_show(map)
condition here, or check map->btf in each individual implementation
of map_support_seq_show().
}
static struct dentry *
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5af4e9e..0b6f6e8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -455,6 +455,23 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
return 0;
}
+static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
+ u32 btf_key_id, u32 btf_value_id)
+{
+ const struct btf_type *key_type, *value_type;
+ u32 key_size, value_size;
+
+ key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
+ if (!key_type || key_size != map->key_size)
+ return -EINVAL;
+
+ value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
+ if (!value_type || value_size != map->value_size)
+ return -EINVAL;
+
+ return 0;
+}
+
#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
/* called via syscall */
static int map_create(union bpf_attr *attr)
@@ -489,8 +506,7 @@ static int map_create(union bpf_attr *attr)
atomic_set(&map->refcnt, 1);
atomic_set(&map->usercnt, 1);
- if (bpf_map_support_seq_show(map) &&
- (attr->btf_key_type_id || attr->btf_value_type_id)) {
+ if (attr->btf_key_type_id || attr->btf_value_type_id) {
struct btf *btf;
if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
@@ -504,8 +520,8 @@ static int map_create(union bpf_attr *attr)
goto free_map_nouncharge;
}
- err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
- attr->btf_value_type_id);
+ err = map_check_btf(map, btf, attr->btf_key_type_id,
+ attr->btf_value_type_id);
if (err) {
btf_put(btf);
goto free_map_nouncharge;