Hi all,
On Mon, Oct 9, 2023 at 1:54 PM Peter Krempa <[email protected]> wrote:
>
> On Mon, Oct 09, 2023 at 09:16:14 +0300, Andrew Melnychenko wrote:
> > Added logic for loading the "RSS" eBPF program.
> > eBPF file descriptors passed to the QEMU.
> >
> > Signed-off-by: Andrew Melnychenko <[email protected]>
> > ---
> > src/qemu/qemu_command.c | 53 +++++++++++++++++++++++++++++++++++++++++
> > src/qemu/qemu_domain.c | 4 ++++
> > src/qemu/qemu_domain.h | 3 +++
> > 3 files changed, 60 insertions(+)
>
> This will require addition to qemuxml2argvtest once the qemu bit is
> ready.
>
>
> > diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> > index 8a7b80719f..ca6cb1bc7a 100644
> > --- a/src/qemu/qemu_command.c
> > +++ b/src/qemu/qemu_command.c
> > @@ -3786,6 +3786,23 @@ qemuBuildLegacyNicStr(virDomainNetDef *net)
> > NULLSTR_EMPTY(net->info.alias));
> > }
> >
> > +static char *qemuBuildEbpfRssArg(virDomainNetDef *net) {
> > + qemuDomainNetworkPrivate *netpriv = QEMU_DOMAIN_NETWORK_PRIVATE(net);
> > + g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
> > + size_t nfds;
> > + GSList *n;
> > +
> > + if (netpriv->ebpfrssfds) {
>
> This check is not needed.
>
> > + nfds = 0;
> > + for (n = netpriv->ebpfrssfds; n; n = n->next) {
> > + virBufferAsprintf(&buf, "%s:",
> > qemuFDPassDirectGetPath(n->data));
> > + nfds++;
>
> 'nfds' is not used.
>
>
> > + }
> > + }
> > + virBufferTrim(&buf, ":");
> > +
> > + return virBufferContentAndReset(&buf);
> > +}
> >
> > virJSONValue *
> > qemuBuildNicDevProps(virDomainDef *def,
> > @@ -3871,6 +3888,11 @@ qemuBuildNicDevProps(virDomainDef *def,
> > "T:failover", failover,
> > NULL) < 0)
> > return NULL;
> > + if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON &&
> > virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_NET_EBPF_RSS_FDS)){
>
> This check should not be needed here ...
>
> > + g_autofree char *ebpf = qemuBuildEbpfRssArg(net);
>
> Because this will return a NULL string if it was not set up.
>
> > + if (virJSONValueObjectAdd(&props, "S:ebpf_rss_fds", ebpf,
> > NULL) < 0)
>
> This is definitely not needed in a separate step and can be done in the
> block above as "S:" skipps the attribute if the argument is NULL.
>
> > + return NULL;
> > + }
> > } else {
> > if (virJSONValueObjectAdd(&props,
> > "s:driver",
> > virDomainNetGetModelString(net),
> > @@ -4152,6 +4174,33 @@ qemuBuildWatchdogDevProps(const virDomainDef *def,
> > }
> >
> >
> > +static void qemuOpenEbpfRssFds(virDomainNetDef *net,
> > + virQEMUCaps *qemuCaps) {
> > + const void *ebpfRSSObject = NULL;
> > + size_t ebpfRSSObjectSize = 0;
> > + int fds[16];
>
> 16 feels arbitrary
Yes, it should be big enough. QEMU in the future may have less/more
program&maps file descriptors.
I'm thinking about a routine that could allocate the exact fd array
during ebpf load.
>
> > + int nfds = 0;
> > + qemuDomainNetworkPrivate *netpriv = QEMU_DOMAIN_NETWORK_PRIVATE(net);
> > +
> > + netpriv->libbpfRSSObject = NULL;
> > + netpriv->ebpfrssfds = NULL;
> > +
> > + /* Add ebpf values */
> > + if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON &&
> > virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_NET_EBPF_RSS_FDS)) {
>
> This line is too long and there's a good spot to break it up;
>
> > + ebpfRSSObject = virQEMUCapsGetEbpf(qemuCaps, "rss",
> > &ebpfRSSObjectSize);
> > + nfds = qemuInterfaceLoadEbpf(ebpfRSSObject, ebpfRSSObjectSize,
> > &netpriv->libbpfRSSObject, fds, 16);
>
> Note that the '16' argument is unused by the callee.
>
> Also note that when running an unprivileged instance of libvirt, this
> function might not have permissions to do anything. It seems that the
> rest is okay, but we need to avoid any form of spurious errors and such
> if that is really okay and there are fallback paths.
>
> > +
> > + if (nfds > 0) {
> > + for (int i = 0; i < nfds; ++i) {
> > + g_autofree char *name = g_strdup_printf("ebpfrssfd-%s-%u",
> > net->info.alias, i);
> > + netpriv->ebpfrssfds = g_slist_prepend(netpriv->ebpfrssfds,
> > qemuFDPassDirectNew(name, fds + i));
> > + }
> > + netpriv->ebpfrssfds = g_slist_reverse(netpriv->ebpfrssfds);
> > + }
> > + }
> > +}
> > +
> > +
> > static int
> > qemuBuildWatchdogCommandLine(virCommand *cmd,
> > const virDomainDef *def,
> > @@ -8735,6 +8784,10 @@ qemuBuildInterfaceCommandLine(virQEMUDriver *driver,
> > qemuFDPassDirectTransferCommand(netpriv->slirpfd, cmd);
> > qemuFDPassTransferCommand(netpriv->vdpafd, cmd);
> >
> > + qemuOpenEbpfRssFds(net, qemuCaps);
>
> Note that this will cause problems when you'll be about to test this
> functionality in qemuxml2argvtest. The test code must not modify the
> host state, so this will either need to be mocked in the testsuite or
> you'll need to move it to the host-prepare function
> 'qemuProcessPrepareHost' which is skipped from the test suite. The test
> suite will then need to fake the data.
I think it will be mocked.
>
> > + for (n = netpriv->ebpfrssfds; n; n = n->next)
> > + qemuFDPassDirectTransferCommand(n->data, cmd);
> > +
> > if (!(hostnetprops = qemuBuildHostNetProps(vm, net)))
> > goto cleanup;
> >
> > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> > index eec7bed28b..c696482f29 100644
> > --- a/src/qemu/qemu_domain.c
> > +++ b/src/qemu/qemu_domain.c
> > @@ -38,6 +38,7 @@
> > #include "qemu_checkpoint.h"
> > #include "qemu_validate.h"
> > #include "qemu_namespace.h"
> > +#include "qemu_interface.h"
> > #include "viralloc.h"
> > #include "virlog.h"
> > #include "virerror.h"
> > @@ -1078,6 +1079,7 @@
> > qemuDomainNetworkPrivateClearFDs(qemuDomainNetworkPrivate *priv)
> > g_clear_pointer(&priv->vdpafd, qemuFDPassFree);
> > g_slist_free_full(g_steal_pointer(&priv->vhostfds), (GDestroyNotify)
> > qemuFDPassDirectFree);
> > g_slist_free_full(g_steal_pointer(&priv->tapfds), (GDestroyNotify)
> > qemuFDPassDirectFree);
> > + g_slist_free_full(g_steal_pointer(&priv->ebpfrssfds), (GDestroyNotify)
> > qemuFDPassDirectFree);
> > }
> >
> >
> > @@ -1089,6 +1091,8 @@ qemuDomainNetworkPrivateDispose(void *obj
> > G_GNUC_UNUSED)
> > qemuSlirpFree(priv->slirp);
> >
> > qemuDomainNetworkPrivateClearFDs(priv);
> > +
> > + qemuInterfaceCloseEbpf(priv->libbpfRSSObject);
>
> Does libvirt need to keep this open during the whole lifetime of the VM?
> Also note that this code will be called when restarting the
> libvirtd/virtqemud daemon, thus it can be called while the VM is alive.
>
I believe we can close the libbpf context right after QEMU
launch(after fds are passed to the QEMU process).
The idea was if the private context holds libbpf resources - it will
destroy them with the private network context itself.