Hi, all.

I've researched an issue a bit. And what can we do?
In the case of an "old" kernel 5.4, we need to load RSS eBPF without
BPF_F_MAPPABLE
and use bpf syscall to update the maps. This requires additional capabilities,
and the libvirtd will never give any capabilities to Qemu.
So, the only case for "fallback" is running Qemu manually with
capabilities(or with root) on kernel 5.4.

We can add hack/fallback to RSS ebpf loading routine with additional
checks and modify for BPF_F_MAPPABLE.
And we can add a fallback for mmap/syscall ebpf access.

The problem is that we need kernel 5.5 with BPF_F_MAPPABLE in headers
to compile Qemu with fallback,
or move macro to the Qemu headers.

It can be implemented something like this:
RSS eBPF open/load:
 * open the skeleton.
 * load the skeleton as is - it would fail because of an unknown BPF_F_MAPPABLE.
 * hack/modify map_flags for skeleton and try to reload.
RSS eBPF map update(this is straightforward):
 * check the mem pointer if null, use bpf syscall

The advantage of hacks in Qemu is that we are aware of the eBPF context.
I suggest creating different series of patches that would implement
the hack/fallback,
If we really want to support eBPF on old kernels.

On Wed, Aug 9, 2023 at 5:21 AM Jason Wang <jasow...@redhat.com> wrote:
>
> On Wed, Aug 9, 2023 at 7:15 AM Andrew Melnichenko <and...@daynix.com> wrote:
> >
> > Hi all,
> >
> > On Tue, Aug 8, 2023 at 5:39 AM Jason Wang <jasow...@redhat.com> wrote:
> > >
> > > On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <and...@daynix.com> 
> > > wrote:
> > > >
> > > > Changed eBPF map updates through mmaped array.
> > > > Mmaped arrays provide direct access to map data.
> > > > It should omit using bpf_map_update_elem() call,
> > > > which may require capabilities that are not present.
> > > >
> > > > Signed-off-by: Andrew Melnychenko <and...@daynix.com>
> > > > ---
> > > >  ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
> > > >  ebpf/ebpf_rss.h |   5 +++
> > > >  2 files changed, 99 insertions(+), 23 deletions(-)
> > > >
> > > > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > > > index cee658c158b..247f5eee1b6 100644
> > > > --- a/ebpf/ebpf_rss.c
> > > > +++ b/ebpf/ebpf_rss.c
> > > > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > > >  {
> > > >      if (ctx != NULL) {
> > > >          ctx->obj = NULL;
> > > > +        ctx->program_fd = -1;
> > > > +        ctx->map_configuration = -1;
> > > > +        ctx->map_toeplitz_key = -1;
> > > > +        ctx->map_indirections_table = -1;
> > > > +
> > > > +        ctx->mmap_configuration = NULL;
> > > > +        ctx->mmap_toeplitz_key = NULL;
> > > > +        ctx->mmap_indirections_table = NULL;
> > > >      }
> > > >  }
> > > >
> > > >  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> > > >  {
> > > > -    return ctx != NULL && ctx->obj != NULL;
> > > > +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > > > +}
> > > > +
> > > > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > > > +{
> > > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > > +        return false;
> > > > +    }
> > > > +
> > > > +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > +                                   ctx->map_configuration, 0);
> > > > +    if (ctx->mmap_configuration == MAP_FAILED) {
> > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration 
> > > > array");
> > > > +        return false;
> > > > +    }
> > > > +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > +                                   ctx->map_toeplitz_key, 0);
> > > > +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > > > +        goto toeplitz_fail;
> > > > +    }
> > > > +    ctx->mmap_indirections_table = mmap(NULL, 
> > > > qemu_real_host_page_size(),
> > > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > > +                                   ctx->map_indirections_table, 0);
> > > > +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> > > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection 
> > > > table");
> > > > +        goto indirection_fail;
> > > > +    }
> > > > +
> > > > +    return true;
> > > > +
> > > > +indirection_fail:
> > > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > > +toeplitz_fail:
> > > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > > +
> > > > +    ctx->mmap_configuration = NULL;
> > > > +    ctx->mmap_toeplitz_key = NULL;
> > > > +    ctx->mmap_indirections_table = NULL;
> > > > +    return false;
> > > > +}
> > > > +
> > > > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > > > +{
> > > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > > +
> > > > +    ctx->mmap_configuration = NULL;
> > > > +    ctx->mmap_toeplitz_key = NULL;
> > > > +    ctx->mmap_indirections_table = NULL;
> > > >  }
> > > >
> > > >  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > > >  {
> > > >      struct rss_bpf *rss_bpf_ctx;
> > > >
> > > > -    if (ctx == NULL) {
> > > > +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> > > >          return false;
> > > >      }
> > > >
> > > > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > > >      ctx->map_toeplitz_key = bpf_map__fd(
> > > >              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> > > >
> > > > +    if (!ebpf_rss_mmap(ctx)) {
> > > > +        goto error;
> > > > +    }
> > > > +
> > > >      return true;
> > > >  error:
> > > >      rss_bpf__destroy(rss_bpf_ctx);
> > > >      ctx->obj = NULL;
> > > > +    ctx->program_fd = -1;
> > > > +    ctx->map_configuration = -1;
> > > > +    ctx->map_toeplitz_key = -1;
> > > > +    ctx->map_indirections_table = -1;
> > > >
> > > >      return false;
> > > >  }
> > > > @@ -77,15 +149,11 @@ error:
> > > >  static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> > > >                                  struct EBPFRSSConfig *config)
> > > >  {
> > > > -    uint32_t map_key = 0;
> > > > -
> > > >      if (!ebpf_rss_is_loaded(ctx)) {
> > > >          return false;
> > > >      }
> > > > -    if (bpf_map_update_elem(ctx->map_configuration,
> > > > -                            &map_key, config, 0) < 0) {
> > > > -        return false;
> > > > -    }
> > > > +
> > > > +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
> > > >      return true;
> > > >  }
> > > >
> > > > @@ -93,27 +161,19 @@ static bool ebpf_rss_set_indirections_table(struct 
> > > > EBPFRSSContext *ctx,
> > > >                                              uint16_t 
> > > > *indirections_table,
> > > >                                              size_t len)
> > > >  {
> > > > -    uint32_t i = 0;
> > > > -
> > > >      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> > > >         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> > > >          return false;
> > > >      }
> > > >
> > > > -    for (; i < len; ++i) {
> > > > -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> > > > -                                indirections_table + i, 0) < 0) {
> > > > -            return false;
> > > > -        }
> > > > -    }
> > > > +    memcpy(ctx->mmap_indirections_table, indirections_table,
> > > > +            sizeof(*indirections_table) * len);
> > >
> > > As discussed, should we stick the compatibility on the host without
> > > bpf mmap support?
> > >
> > > If we don't, we need at least probe BPF mmap and disable ebpf rss? If
> > > yes, we should track if the map is mmaped and switch between memcpy
> > > and syscall.
> > >
> > > Thanks
> >
> > I've made some tests.
> > I've checked eBPF program on kernels 5.4, 5.5, and 6.3 with libbpf
> > 1.0.1, 1.1.0, and last 1.2.0.
>
> It looks to me we don't check the libbpf version now. Do we need to do
> that (e.g the bpf mmap support for libbpf is not supported from the
> start).
>
> > Overall, eBPF program requires explicit declaration of BPF_F_MAPPABLE 
> > map_flags.
> > Without this flag, the program can be loaded on every tested
> > kernel/libbpf configuration but Qemu can't mmap() the eBPF
> > fds(obviously).
>
> Exactly, and that's why I'm asking whether or not we need to probe mmap here.
>
> This is because, without this series, Qemu can work without BPF mmap
> (but with privilege). And this doesn't work after this series.
>
> > Alternative to mmap() is bpf_map_update_elem() syscall/method which
> > would require capabilities for Qemu.
>
> Yes, but that's a different "problem". I think we should keep Qemu
> work in that setup. (privilege + syscall updating).
>
> > With this flag, kernel 5.4 + libbpf can't load eBPF object.
> > So, compatibility would require 2 different eBPF objects or some kind
> > of hack, also it would require additional capability for Qemu.
> > I don't think that we need checks for disabling eBPF RSS. It wouldn't
> > brake anything on the old kernel and after an update, it should work
> > ok.
>
> Even on old kernel + old libbpf without bpf mmap support?
>
> Thanks
>
> >
> > >
> > > >      return true;
> > > >  }
> > > >
> > > >  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > > >                                       uint8_t *toeplitz_key)
> > > >  {
> > > > -    uint32_t map_key = 0;
> > > > -
> > > >      /* prepare toeplitz key */
> > > >      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
> > > >
> > > > @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct 
> > > > EBPFRSSContext *ctx,
> > > >      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > > >      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
> > > >
> > > > -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> > > > -                            0) < 0) {
> > > > -        return false;
> > > > -    }
> > > > +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > > >      return true;
> > > >  }
> > > >
> > > > @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> > > >          return;
> > > >      }
> > > >
> > > > -    rss_bpf__destroy(ctx->obj);
> > > > +    ebpf_rss_munmap(ctx);
> > > > +
> > > > +    if (ctx->obj) {
> > > > +        rss_bpf__destroy(ctx->obj);
> > > > +    } else {
> > > > +        close(ctx->program_fd);
> > > > +        close(ctx->map_configuration);
> > > > +        close(ctx->map_toeplitz_key);
> > > > +        close(ctx->map_indirections_table);
> > > > +    }
> > > > +
> > > >      ctx->obj = NULL;
> > > > +    ctx->program_fd = -1;
> > > > +    ctx->map_configuration = -1;
> > > > +    ctx->map_toeplitz_key = -1;
> > > > +    ctx->map_indirections_table = -1;
> > > >  }
> > > > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> > > > index bf3f2572c7c..ab08a7266d0 100644
> > > > --- a/ebpf/ebpf_rss.h
> > > > +++ b/ebpf/ebpf_rss.h
> > > > @@ -20,6 +20,11 @@ struct EBPFRSSContext {
> > > >      int map_configuration;
> > > >      int map_toeplitz_key;
> > > >      int map_indirections_table;
> > > > +
> > > > +    /* mapped eBPF maps for direct access to omit 
> > > > bpf_map_update_elem() */
> > > > +    void *mmap_configuration;
> > > > +    void *mmap_toeplitz_key;
> > > > +    void *mmap_indirections_table;
> > > >  };
> > > >
> > > >  struct EBPFRSSConfig {
> > > > --
> > > > 2.41.0
> > > >
> > >
> >
>

Reply via email to