On Mon, Aug 14, 2023 at 4:36 PM Andrew Melnichenko <and...@daynix.com> wrote: > > Hi, all. > > I've researched an issue a bit. And what can we do? > In the case of an "old" kernel 5.4, we need to load RSS eBPF without > BPF_F_MAPPABLE > and use bpf syscall to update the maps. This requires additional capabilities, > and the libvirtd will never give any capabilities to Qemu. > So, the only case for "fallback" is running Qemu manually with > capabilities(or with root) on kernel 5.4. > > We can add hack/fallback to RSS ebpf loading routine with additional > checks and modify for BPF_F_MAPPABLE. > And we can add a fallback for mmap/syscall ebpf access. > > The problem is that we need kernel 5.5 with BPF_F_MAPPABLE in headers > to compile Qemu with fallback, > or move macro to the Qemu headers. > > It can be implemented something like this: > RSS eBPF open/load: > * open the skeleton. > * load the skeleton as is - it would fail because of an unknown > BPF_F_MAPPABLE. > * hack/modify map_flags for skeleton and try to reload. > RSS eBPF map update(this is straightforward): > * check the mem pointer if null, use bpf syscall > > The advantage of hacks in Qemu is that we are aware of the eBPF context. > I suggest creating different series of patches that would implement > the hack/fallback, > If we really want to support eBPF on old kernels.
So I think the simplest way is to disable eBPF RSS support on old kernels? (e.g during the configure) Thanks > > On Wed, Aug 9, 2023 at 5:21 AM Jason Wang <jasow...@redhat.com> wrote: > > > > On Wed, Aug 9, 2023 at 7:15 AM Andrew Melnichenko <and...@daynix.com> wrote: > > > > > > Hi all, > > > > > > On Tue, Aug 8, 2023 at 5:39 AM Jason Wang <jasow...@redhat.com> wrote: > > > > > > > > On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <and...@daynix.com> > > > > wrote: > > > > > > > > > > Changed eBPF map updates through mmaped array. > > > > > Mmaped arrays provide direct access to map data. > > > > > It should omit using bpf_map_update_elem() call, > > > > > which may require capabilities that are not present. > > > > > > > > > > Signed-off-by: Andrew Melnychenko <and...@daynix.com> > > > > > --- > > > > > ebpf/ebpf_rss.c | 117 > > > > > ++++++++++++++++++++++++++++++++++++++---------- > > > > > ebpf/ebpf_rss.h | 5 +++ > > > > > 2 files changed, 99 insertions(+), 23 deletions(-) > > > > > > > > > > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c > > > > > index cee658c158b..247f5eee1b6 100644 > > > > > --- a/ebpf/ebpf_rss.c > > > > > +++ b/ebpf/ebpf_rss.c > > > > > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx) > > > > > { > > > > > if (ctx != NULL) { > > > > > ctx->obj = NULL; > > > > > + ctx->program_fd = -1; > > > > > + ctx->map_configuration = -1; > > > > > + ctx->map_toeplitz_key = -1; > > > > > + ctx->map_indirections_table = -1; > > > > > + > > > > > + ctx->mmap_configuration = NULL; > > > > > + ctx->mmap_toeplitz_key = NULL; > > > > > + ctx->mmap_indirections_table = NULL; > > > > > } > > > > > } > > > > > > > > > > bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx) > > > > > { > > > > > - return ctx != NULL && ctx->obj != NULL; > > > > > + return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != > > > > > -1); > > > > > +} > > > > > + > > > > > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx) > > > > > +{ > > > > > + if (!ebpf_rss_is_loaded(ctx)) { > > > > > + return false; > > > > > + } > > > > > + > > > > > + ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(), > > > > > + PROT_READ | PROT_WRITE, > > > > > MAP_SHARED, > > > > > + ctx->map_configuration, 0); > > > > > + if (ctx->mmap_configuration == MAP_FAILED) { > > > > > + trace_ebpf_error("eBPF RSS", "can not mmap eBPF > > > > > configuration array"); > > > > > + return false; > > > > > + } > > > > > + ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(), > > > > > + PROT_READ | PROT_WRITE, > > > > > MAP_SHARED, > > > > > + ctx->map_toeplitz_key, 0); > > > > > + if (ctx->mmap_toeplitz_key == MAP_FAILED) { > > > > > + trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz > > > > > key"); > > > > > + goto toeplitz_fail; > > > > > + } > > > > > + ctx->mmap_indirections_table = mmap(NULL, > > > > > qemu_real_host_page_size(), > > > > > + PROT_READ | PROT_WRITE, > > > > > MAP_SHARED, > > > > > + ctx->map_indirections_table, 0); > > > > > + if (ctx->mmap_indirections_table == MAP_FAILED) { > > > > > + trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection > > > > > table"); > > > > > + goto indirection_fail; > > > > > + } > > > > > + > > > > > + return true; > > > > > + > > > > > +indirection_fail: > > > > > + munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size()); > > > > > +toeplitz_fail: > > > > > + munmap(ctx->mmap_configuration, qemu_real_host_page_size()); > > > > > + > > > > > + ctx->mmap_configuration = NULL; > > > > > + ctx->mmap_toeplitz_key = NULL; > > > > > + ctx->mmap_indirections_table = NULL; > > > > > + return false; > > > > > +} > > > > > + > > > > > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx) > > > > > +{ > > > > > + if (!ebpf_rss_is_loaded(ctx)) { > > > > > + return; > > > > > + } > > > > > + > > > > > + munmap(ctx->mmap_indirections_table, qemu_real_host_page_size()); > > > > > + munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size()); > > > > > + munmap(ctx->mmap_configuration, qemu_real_host_page_size()); > > > > > + > > > > > + ctx->mmap_configuration = NULL; > > > > > + ctx->mmap_toeplitz_key = NULL; > > > > > + ctx->mmap_indirections_table = NULL; > > > > > } > > > > > > > > > > bool ebpf_rss_load(struct EBPFRSSContext *ctx) > > > > > { > > > > > struct rss_bpf *rss_bpf_ctx; > > > > > > > > > > - if (ctx == NULL) { > > > > > + if (ctx == NULL || ebpf_rss_is_loaded(ctx)) { > > > > > return false; > > > > > } > > > > > > > > > > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx) > > > > > ctx->map_toeplitz_key = bpf_map__fd( > > > > > rss_bpf_ctx->maps.tap_rss_map_toeplitz_key); > > > > > > > > > > + if (!ebpf_rss_mmap(ctx)) { > > > > > + goto error; > > > > > + } > > > > > + > > > > > return true; > > > > > error: > > > > > rss_bpf__destroy(rss_bpf_ctx); > > > > > ctx->obj = NULL; > > > > > + ctx->program_fd = -1; > > > > > + ctx->map_configuration = -1; > > > > > + ctx->map_toeplitz_key = -1; > > > > > + ctx->map_indirections_table = -1; > > > > > > > > > > return false; > > > > > } > > > > > @@ -77,15 +149,11 @@ error: > > > > > static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx, > > > > > struct EBPFRSSConfig *config) > > > > > { > > > > > - uint32_t map_key = 0; > > > > > - > > > > > if (!ebpf_rss_is_loaded(ctx)) { > > > > > return false; > > > > > } > > > > > - if (bpf_map_update_elem(ctx->map_configuration, > > > > > - &map_key, config, 0) < 0) { > > > > > - return false; > > > > > - } > > > > > + > > > > > + memcpy(ctx->mmap_configuration, config, sizeof(*config)); > > > > > return true; > > > > > } > > > > > > > > > > @@ -93,27 +161,19 @@ static bool > > > > > ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx, > > > > > uint16_t > > > > > *indirections_table, > > > > > size_t len) > > > > > { > > > > > - uint32_t i = 0; > > > > > - > > > > > if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL || > > > > > len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { > > > > > return false; > > > > > } > > > > > > > > > > - for (; i < len; ++i) { > > > > > - if (bpf_map_update_elem(ctx->map_indirections_table, &i, > > > > > - indirections_table + i, 0) < 0) { > > > > > - return false; > > > > > - } > > > > > - } > > > > > + memcpy(ctx->mmap_indirections_table, indirections_table, > > > > > + sizeof(*indirections_table) * len); > > > > > > > > As discussed, should we stick the compatibility on the host without > > > > bpf mmap support? > > > > > > > > If we don't, we need at least probe BPF mmap and disable ebpf rss? If > > > > yes, we should track if the map is mmaped and switch between memcpy > > > > and syscall. > > > > > > > > Thanks > > > > > > I've made some tests. > > > I've checked eBPF program on kernels 5.4, 5.5, and 6.3 with libbpf > > > 1.0.1, 1.1.0, and last 1.2.0. > > > > It looks to me we don't check the libbpf version now. Do we need to do > > that (e.g the bpf mmap support for libbpf is not supported from the > > start). > > > > > Overall, eBPF program requires explicit declaration of BPF_F_MAPPABLE > > > map_flags. > > > Without this flag, the program can be loaded on every tested > > > kernel/libbpf configuration but Qemu can't mmap() the eBPF > > > fds(obviously). > > > > Exactly, and that's why I'm asking whether or not we need to probe mmap > > here. > > > > This is because, without this series, Qemu can work without BPF mmap > > (but with privilege). And this doesn't work after this series. > > > > > Alternative to mmap() is bpf_map_update_elem() syscall/method which > > > would require capabilities for Qemu. > > > > Yes, but that's a different "problem". I think we should keep Qemu > > work in that setup. (privilege + syscall updating). > > > > > With this flag, kernel 5.4 + libbpf can't load eBPF object. > > > So, compatibility would require 2 different eBPF objects or some kind > > > of hack, also it would require additional capability for Qemu. > > > I don't think that we need checks for disabling eBPF RSS. It wouldn't > > > brake anything on the old kernel and after an update, it should work > > > ok. > > > > Even on old kernel + old libbpf without bpf mmap support? > > > > Thanks > > > > > > > > > > > > > > return true; > > > > > } > > > > > > > > > > static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx, > > > > > uint8_t *toeplitz_key) > > > > > { > > > > > - uint32_t map_key = 0; > > > > > - > > > > > /* prepare toeplitz key */ > > > > > uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {}; > > > > > > > > > > @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct > > > > > EBPFRSSContext *ctx, > > > > > memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE); > > > > > *(uint32_t *)toe = ntohl(*(uint32_t *)toe); > > > > > > > > > > - if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe, > > > > > - 0) < 0) { > > > > > - return false; > > > > > - } > > > > > + memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE); > > > > > return true; > > > > > } > > > > > > > > > > @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx) > > > > > return; > > > > > } > > > > > > > > > > - rss_bpf__destroy(ctx->obj); > > > > > + ebpf_rss_munmap(ctx); > > > > > + > > > > > + if (ctx->obj) { > > > > > + rss_bpf__destroy(ctx->obj); > > > > > + } else { > > > > > + close(ctx->program_fd); > > > > > + close(ctx->map_configuration); > > > > > + close(ctx->map_toeplitz_key); > > > > > + close(ctx->map_indirections_table); > > > > > + } > > > > > + > > > > > ctx->obj = NULL; > > > > > + ctx->program_fd = -1; > > > > > + ctx->map_configuration = -1; > > > > > + ctx->map_toeplitz_key = -1; > > > > > + ctx->map_indirections_table = -1; > > > > > } > > > > > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h > > > > > index bf3f2572c7c..ab08a7266d0 100644 > > > > > --- a/ebpf/ebpf_rss.h > > > > > +++ b/ebpf/ebpf_rss.h > > > > > @@ -20,6 +20,11 @@ struct EBPFRSSContext { > > > > > int map_configuration; > > > > > int map_toeplitz_key; > > > > > int map_indirections_table; > > > > > + > > > > > + /* mapped eBPF maps for direct access to omit > > > > > bpf_map_update_elem() */ > > > > > + void *mmap_configuration; > > > > > + void *mmap_toeplitz_key; > > > > > + void *mmap_indirections_table; > > > > > }; > > > > > > > > > > struct EBPFRSSConfig { > > > > > -- > > > > > 2.41.0 > > > > > > > > > > > > > > >