On Wed, Aug 9, 2023 at 7:15 AM Andrew Melnichenko <and...@daynix.com> wrote:
>
> Hi all,
>
> On Tue, Aug 8, 2023 at 5:39 AM Jason Wang <jasow...@redhat.com> wrote:
> >
> > On Thu, Aug 3, 2023 at 5:01 AM Andrew Melnychenko <and...@daynix.com> wrote:
> > >
> > > Changed eBPF map updates through mmaped array.
> > > Mmaped arrays provide direct access to map data.
> > > It should omit using bpf_map_update_elem() call,
> > > which may require capabilities that are not present.
> > >
> > > Signed-off-by: Andrew Melnychenko <and...@daynix.com>
> > > ---
> > >  ebpf/ebpf_rss.c | 117 ++++++++++++++++++++++++++++++++++++++----------
> > >  ebpf/ebpf_rss.h |   5 +++
> > >  2 files changed, 99 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > > index cee658c158b..247f5eee1b6 100644
> > > --- a/ebpf/ebpf_rss.c
> > > +++ b/ebpf/ebpf_rss.c
> > > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> > >  {
> > >      if (ctx != NULL) {
> > >          ctx->obj = NULL;
> > > +        ctx->program_fd = -1;
> > > +        ctx->map_configuration = -1;
> > > +        ctx->map_toeplitz_key = -1;
> > > +        ctx->map_indirections_table = -1;
> > > +
> > > +        ctx->mmap_configuration = NULL;
> > > +        ctx->mmap_toeplitz_key = NULL;
> > > +        ctx->mmap_indirections_table = NULL;
> > >      }
> > >  }
> > >
> > >  bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> > >  {
> > > -    return ctx != NULL && ctx->obj != NULL;
> > > +    return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > > +}
> > > +
> > > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > > +{
> > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > +        return false;
> > > +    }
> > > +
> > > +    ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > +                                   ctx->map_configuration, 0);
> > > +    if (ctx->mmap_configuration == MAP_FAILED) {
> > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration 
> > > array");
> > > +        return false;
> > > +    }
> > > +    ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > +                                   ctx->map_toeplitz_key, 0);
> > > +    if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > > +        goto toeplitz_fail;
> > > +    }
> > > +    ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
> > > +                                   PROT_READ | PROT_WRITE, MAP_SHARED,
> > > +                                   ctx->map_indirections_table, 0);
> > > +    if (ctx->mmap_indirections_table == MAP_FAILED) {
> > > +        trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection 
> > > table");
> > > +        goto indirection_fail;
> > > +    }
> > > +
> > > +    return true;
> > > +
> > > +indirection_fail:
> > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > +toeplitz_fail:
> > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > +
> > > +    ctx->mmap_configuration = NULL;
> > > +    ctx->mmap_toeplitz_key = NULL;
> > > +    ctx->mmap_indirections_table = NULL;
> > > +    return false;
> > > +}
> > > +
> > > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > > +{
> > > +    if (!ebpf_rss_is_loaded(ctx)) {
> > > +        return;
> > > +    }
> > > +
> > > +    munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > > +    munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > > +    munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > > +
> > > +    ctx->mmap_configuration = NULL;
> > > +    ctx->mmap_toeplitz_key = NULL;
> > > +    ctx->mmap_indirections_table = NULL;
> > >  }
> > >
> > >  bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > >  {
> > >      struct rss_bpf *rss_bpf_ctx;
> > >
> > > -    if (ctx == NULL) {
> > > +    if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> > >          return false;
> > >      }
> > >
> > > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> > >      ctx->map_toeplitz_key = bpf_map__fd(
> > >              rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> > >
> > > +    if (!ebpf_rss_mmap(ctx)) {
> > > +        goto error;
> > > +    }
> > > +
> > >      return true;
> > >  error:
> > >      rss_bpf__destroy(rss_bpf_ctx);
> > >      ctx->obj = NULL;
> > > +    ctx->program_fd = -1;
> > > +    ctx->map_configuration = -1;
> > > +    ctx->map_toeplitz_key = -1;
> > > +    ctx->map_indirections_table = -1;
> > >
> > >      return false;
> > >  }
> > > @@ -77,15 +149,11 @@ error:
> > >  static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> > >                                  struct EBPFRSSConfig *config)
> > >  {
> > > -    uint32_t map_key = 0;
> > > -
> > >      if (!ebpf_rss_is_loaded(ctx)) {
> > >          return false;
> > >      }
> > > -    if (bpf_map_update_elem(ctx->map_configuration,
> > > -                            &map_key, config, 0) < 0) {
> > > -        return false;
> > > -    }
> > > +
> > > +    memcpy(ctx->mmap_configuration, config, sizeof(*config));
> > >      return true;
> > >  }
> > >
> > > @@ -93,27 +161,19 @@ static bool ebpf_rss_set_indirections_table(struct 
> > > EBPFRSSContext *ctx,
> > >                                              uint16_t *indirections_table,
> > >                                              size_t len)
> > >  {
> > > -    uint32_t i = 0;
> > > -
> > >      if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> > >         len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> > >          return false;
> > >      }
> > >
> > > -    for (; i < len; ++i) {
> > > -        if (bpf_map_update_elem(ctx->map_indirections_table, &i,
> > > -                                indirections_table + i, 0) < 0) {
> > > -            return false;
> > > -        }
> > > -    }
> > > +    memcpy(ctx->mmap_indirections_table, indirections_table,
> > > +            sizeof(*indirections_table) * len);
> >
> > As discussed, should we stick the compatibility on the host without
> > bpf mmap support?
> >
> > If we don't, we need at least probe BPF mmap and disable ebpf rss? If
> > yes, we should track if the map is mmaped and switch between memcpy
> > and syscall.
> >
> > Thanks
>
> I've made some tests.
> I've checked eBPF program on kernels 5.4, 5.5, and 6.3 with libbpf
> 1.0.1, 1.1.0, and last 1.2.0.

It looks to me we don't check the libbpf version now. Do we need to do
that (e.g the bpf mmap support for libbpf is not supported from the
start).

> Overall, eBPF program requires explicit declaration of BPF_F_MAPPABLE 
> map_flags.
> Without this flag, the program can be loaded on every tested
> kernel/libbpf configuration but Qemu can't mmap() the eBPF
> fds(obviously).

Exactly, and that's why I'm asking whether or not we need to probe mmap here.

This is because, without this series, Qemu can work without BPF mmap
(but with privilege). And this doesn't work after this series.

> Alternative to mmap() is bpf_map_update_elem() syscall/method which
> would require capabilities for Qemu.

Yes, but that's a different "problem". I think we should keep Qemu
work in that setup. (privilege + syscall updating).

> With this flag, kernel 5.4 + libbpf can't load eBPF object.
> So, compatibility would require 2 different eBPF objects or some kind
> of hack, also it would require additional capability for Qemu.
> I don't think that we need checks for disabling eBPF RSS. It wouldn't
> brake anything on the old kernel and after an update, it should work
> ok.

Even on old kernel + old libbpf without bpf mmap support?

Thanks

>
> >
> > >      return true;
> > >  }
> > >
> > >  static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> > >                                       uint8_t *toeplitz_key)
> > >  {
> > > -    uint32_t map_key = 0;
> > > -
> > >      /* prepare toeplitz key */
> > >      uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
> > >
> > > @@ -123,10 +183,7 @@ static bool ebpf_rss_set_toepliz_key(struct 
> > > EBPFRSSContext *ctx,
> > >      memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > >      *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
> > >
> > > -    if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
> > > -                            0) < 0) {
> > > -        return false;
> > > -    }
> > > +    memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
> > >      return true;
> > >  }
> > >
> > > @@ -160,6 +217,20 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> > >          return;
> > >      }
> > >
> > > -    rss_bpf__destroy(ctx->obj);
> > > +    ebpf_rss_munmap(ctx);
> > > +
> > > +    if (ctx->obj) {
> > > +        rss_bpf__destroy(ctx->obj);
> > > +    } else {
> > > +        close(ctx->program_fd);
> > > +        close(ctx->map_configuration);
> > > +        close(ctx->map_toeplitz_key);
> > > +        close(ctx->map_indirections_table);
> > > +    }
> > > +
> > >      ctx->obj = NULL;
> > > +    ctx->program_fd = -1;
> > > +    ctx->map_configuration = -1;
> > > +    ctx->map_toeplitz_key = -1;
> > > +    ctx->map_indirections_table = -1;
> > >  }
> > > diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> > > index bf3f2572c7c..ab08a7266d0 100644
> > > --- a/ebpf/ebpf_rss.h
> > > +++ b/ebpf/ebpf_rss.h
> > > @@ -20,6 +20,11 @@ struct EBPFRSSContext {
> > >      int map_configuration;
> > >      int map_toeplitz_key;
> > >      int map_indirections_table;
> > > +
> > > +    /* mapped eBPF maps for direct access to omit bpf_map_update_elem() 
> > > */
> > > +    void *mmap_configuration;
> > > +    void *mmap_toeplitz_key;
> > > +    void *mmap_indirections_table;
> > >  };
> > >
> > >  struct EBPFRSSConfig {
> > > --
> > > 2.41.0
> > >
> >
>


Reply via email to