Re: [PATCH v2] vdpa/mlx5: Add and remove debugfs in setup/teardown driver

2023-03-29 Thread Jason Wang
On Tue, Mar 28, 2023 at 3:18 PM Eli Cohen  wrote:
>
> The right place to add the debugfs create is in
> setup_driver() and remove it in teardown_driver().
>
> Current code adds the debugfs when creating the device but resetting a
> device will remove the debugfs subtree and subsequent set_driver will
> not be able to create the files since the debugfs pointer is NULL.
>
> Fixes: 294221004322 ("vdpa/mlx5: Add debugfs subtree")
> Signed-off-by: Eli Cohen 

Acked-by: Jason Wang 

Thanks

>
> ---
> v1 -> v2:
> Fix spelling errors
>
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index b73c5943aefd..f24aa7a7c735 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2538,6 +2538,7 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev)
> err = 0;
> goto out;
> }
> +   mlx5_vdpa_add_debugfs(ndev);
> err = setup_virtqueues(mvdev);
> if (err) {
> mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
> @@ -2584,6 +2585,8 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev)
> if (!ndev->setup)
> return;
>
> +   mlx5_vdpa_remove_debugfs(ndev->debugfs);
> +   ndev->debugfs = NULL;
> teardown_steering(ndev);
> destroy_tir(ndev);
> destroy_rqt(ndev);
> @@ -3304,7 +3307,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev 
> *v_mdev, const char *name,
> if (err)
> goto err_reg;
>
> -   mlx5_vdpa_add_debugfs(ndev);
> mgtdev->ndev = ndev;
> return 0;
>
> --
> 2.38.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2] vdpa/mlx5: Avoid losing link state updates

2023-03-29 Thread Jason Wang
On Tue, Mar 28, 2023 at 3:18 PM Eli Cohen  wrote:
>
> Current code ignores link state updates if VIRTIO_NET_F_STATUS was not
> negotiated. However, link state updates could be received before feature
> negotiation was completed , therefore causing link state events to be
> lost, possibly leaving the link state down.
>
> Add code to detect if VIRTIO_NET_F_STATUS was set and update the link
> state. We add a spinlock to serialize updated to config.status to
> maintain its integrity.
>
> Fixes: 033779a708f0 ("vdpa/mlx5: make MTU/STATUS presence conditional on 
> feature bits")
> Signed-off-by: Eli Cohen 
>
> ---
> v1 -> v2:
> Remove helper function and integrate logic inline
>
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 82 ++-
>  drivers/vdpa/mlx5/net/mlx5_vnet.h |  2 +
>  2 files changed, 51 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c 
> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 0809ee8f6d38..85866ace0061 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2322,10 +2322,53 @@ static void update_cvq_info(struct mlx5_vdpa_dev 
> *mvdev)
> }
>  }
>
> +static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
> +{
> +   u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
> +   u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
> +   int err;
> +
> +   MLX5_SET(query_vport_state_in, in, opcode, 
> MLX5_CMD_OP_QUERY_VPORT_STATE);
> +   MLX5_SET(query_vport_state_in, in, op_mod, opmod);
> +   MLX5_SET(query_vport_state_in, in, vport_number, vport);
> +   if (vport)
> +   MLX5_SET(query_vport_state_in, in, other_vport, 1);
> +
> +   err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
> +   if (err)
> +   return 0;
> +
> +   return MLX5_GET(query_vport_state_out, out, state);
> +}
> +
> +static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
> +{
> +   if (query_vport_state(mvdev->mdev, 
> MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
> +   VPORT_STATE_UP)
> +   return true;
> +
> +   return false;
> +}
> +
> +static void update_link_state(struct mlx5_vdpa_dev *mvdev)
> +{
> +   struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +   bool up;
> +
> +   up = get_link_state(mvdev);
> +   spin_lock(>status_lock);
> +   if (up)
> +   ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, 
> VIRTIO_NET_S_LINK_UP);
> +   else
> +   ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, 
> ~VIRTIO_NET_S_LINK_UP);
> +   spin_unlock(>status_lock);
> +}
> +
>  static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 
> features)
>  {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> +   u64 old;
> int err;
>
> print_features(mvdev, features, true);
> @@ -2334,7 +2377,11 @@ static int mlx5_vdpa_set_driver_features(struct 
> vdpa_device *vdev, u64 features)
> if (err)
> return err;
>
> +   old = ndev->mvdev.actual_features;
> ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
> +   if (~old & ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))
> +   update_link_state(mvdev);

Can we use workqueue to avoid introducing the spinlock here?

> +
> if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
> ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, 
> ndev->config.max_virtqueue_pairs);
> else
> @@ -3019,34 +3066,6 @@ struct mlx5_vdpa_mgmtdev {
> struct mlx5_vdpa_net *ndev;
>  };
>
> -static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
> -{
> -   u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
> -   u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
> -   int err;
> -
> -   MLX5_SET(query_vport_state_in, in, opcode, 
> MLX5_CMD_OP_QUERY_VPORT_STATE);
> -   MLX5_SET(query_vport_state_in, in, op_mod, opmod);
> -   MLX5_SET(query_vport_state_in, in, vport_number, vport);
> -   if (vport)
> -   MLX5_SET(query_vport_state_in, in, other_vport, 1);
> -
> -   err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
> -   if (err)
> -   return 0;
> -
> -   return MLX5_GET(query_vport_state_out, out, state);
> -}
> -
> -static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
> -{
> -   if (query_vport_state(mvdev->mdev, 
> MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
> -   VPORT_STATE_UP)
> -   return true;
> -
> -   return false;
> -}
> -
>  static void update_carrier(struct work_struct *work)
>  {
> struct mlx5_vdpa_wq_ent *wqent;
> @@ -3056,11 +3075,7 @@ static void update_carrier(struct work_struct *work)
> wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
> mvdev = wqent->mvdev;
> ndev = 

Re: [PATCH] vdpa_sim_net: complete the initialization before register the device

2023-03-29 Thread Jason Wang
On Thu, Mar 30, 2023 at 12:03 AM Stefano Garzarella  wrote:
>
> Initialization must be completed before calling _vdpa_register_device()
> since it can connect the device to the vDPA bus, so requests can arrive
> after that call.
>
> So for example vdpasim_net_work(), which uses the net->*_stats variables,
> can be scheduled before they are initialized.
>
> Let's move _vdpa_register_device() to the end of vdpasim_net_dev_add()
> and add a comment to avoid future issues.
>
> Fixes: 0899774cb360 ("vdpa_sim_net: vendor satistics")
> Signed-off-by: Stefano Garzarella 

Acked-by: Jason Wang 

This is needed for -stable.

Thanks

> ---
>
> Notes:
> I don't have a reproducer, but I became aware of this problem while
> I was changing the buffer allocation.
>
> In the block device, as soon as the device is registered, the driver
> sends several requests, and I guess this might happen for the net
> device as well.
>
> Thanks,
> Stefano
>
>  drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 13 +
>  1 file changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c 
> b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> index 862f405362de..dfe2ce341803 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> @@ -466,16 +466,21 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev 
> *mdev, const char *name,
>
> vdpasim_net_setup_config(simdev, config);
>
> -   ret = _vdpa_register_device(>vdpa, VDPASIM_NET_VQ_NUM);
> -   if (ret)
> -   goto reg_err;
> -
> net = sim_to_net(simdev);
>
> u64_stats_init(>tx_stats.syncp);
> u64_stats_init(>rx_stats.syncp);
> u64_stats_init(>cq_stats.syncp);
>
> +   /*
> +* Initialization must be completed before this call, since it can
> +* connect the device to the vDPA bus, so requests can arrive after
> +* this call.
> +*/
> +   ret = _vdpa_register_device(>vdpa, VDPASIM_NET_VQ_NUM);
> +   if (ret)
> +   goto reg_err;
> +
> return 0;
>
>  reg_err:
> --
> 2.39.2
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [External] Re: 9p regression (Was: [PATCH v2] virtio_ring: don't update event idx on get_buf)

2023-03-29 Thread Jason Wang
On Wed, Mar 29, 2023 at 1:42 PM Michael S. Tsirkin  wrote:
>
> On Tue, Mar 28, 2023 at 11:39:59AM +0800, Jason Wang wrote:
> > On Tue, Mar 28, 2023 at 11:09 AM 黄杰  wrote:
> > >
> > > Jason Wang  于2023年3月28日周二 10:59写道:
> > > >
> > > > On Tue, Mar 28, 2023 at 10:13 AM Dominique Martinet
> > > >  wrote:
> > > > >
> > > > > Hi Michael, Albert,
> > > > >
> > > > > Albert Huang wrote on Sat, Mar 25, 2023 at 06:56:33PM +0800:
> > > > > > in virtio_net, if we disable the napi_tx, when we triger a tx 
> > > > > > interrupt,
> > > > > > the vq->event_triggered will be set to true. It will no longer be 
> > > > > > set to
> > > > > > false. Unless we explicitly call virtqueue_enable_cb_delayed or
> > > > > > virtqueue_enable_cb_prepare.
> > > > >
> > > > > This patch (commited as 35395770f803 ("virtio_ring: don't update event
> > > > > idx on get_buf") in next-20230327 apparently breaks 9p, as reported by
> > > > > Luis in 
> > > > > https://lkml.kernel.org/r/zci+7wg5oclsl...@bombadil.infradead.org
> > > > >
> > > > > I've just hit had a look at recent patches[1] and reverted this to 
> > > > > test
> > > > > and I can mount again, so I'm pretty sure this is the culprit, but I
> > > > > didn't look at the content at all yet so cannot advise further.
> > > > > It might very well be that we need some extra handling for 9p
> > > > > specifically that can be added separately if required.
> > > > >
> > > > > [1] git log 0ec57cfa721fbd36b4c4c0d9ccc5d78a78f7fa35..HEAD 
> > > > > drivers/virtio/
> > > > >
> > > > >
> > > > > This can be reproduced with a simple mount, run qemu with some -virtfs
> > > > > argument and `mount -t 9p -o debug=65535 tag mountpoint` will hang 
> > > > > after
> > > > > these messages:
> > > > > 9pnet: -- p9_virtio_request (83): 9p debug: virtio request
> > > > > 9pnet: -- p9_virtio_request (83): virtio request kicked
> > > > >
> > > > > So I suspect we're just not getting a callback.
> > > >
> > > > I think so. The patch assumes the driver will call
> > > > virtqueue_disable/enable_cb() which is not the case of the 9p driver.
> > > >
> > > > So after the first interrupt, event_triggered will be set to true 
> > > > forever.
> > > >
> > > > Thanks
> > > >
> > >
> > > Hi: Wang
> > >
> > > Yes,  This patch assumes that all virtio-related drivers will call
> > > virtqueue_disable/enable_cb().
> > > Thank you for raising this issue.
> > >
> > > It seems that napi_tx is only related to virtue_net. I'm thinking if
> > > we need to refactor
> > > napi_tx instead of implementing it inside virtio_ring.
> >
> > We can hear from others.
> >
> > I think it's better not to workaround virtio_ring issues in a specific
> > driver. It might just add more hacks. We should correctly set
> > VRING_AVAIL_F_NO_INTERRUPT,
> >
> > Do you think the following might work (not even a compile test)?
>
>
> ok but:
>
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 41144b5246a8..12f4efb6dc54 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -852,16 +852,16 @@ static void virtqueue_disable_cb_split(struct
> > virtqueue *_vq)
> >  {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > -   if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
> > -   vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> > -   if (vq->event)
> > -   /* TODO: this is a hack. Figure out a cleaner
> > value to write. */
> > -   vring_used_event(>split.vring) = 0x0;
> > -   else
> > -   vq->split.vring.avail->flags =
> > -   cpu_to_virtio16(_vq->vdev,
> > -   
> > vq->split.avail_flags_shadow);
> > -   }
> > +   if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
> > +   vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> > +
> > +   if (vq->event && !vq->event_triggered)
> > +   /* TODO: this is a hack. Figure out a cleaner value to 
> > write. */
> > +   vring_used_event(>split.vring) = 0x0;
> > +   else
> > +   vq->split.vring.avail->flags =
> > +   cpu_to_virtio16(_vq->vdev,
> > +   vq->split.avail_flags_shadow);
> >  }
> >
> >  static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue 
> > *_vq)
> > @@ -1697,8 +1697,10 @@ static void virtqueue_disable_cb_packed(struct
> > virtqueue *_vq)
> >  {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > -   if (vq->packed.event_flags_shadow != 
> > VRING_PACKED_EVENT_FLAG_DISABLE) {
> > +   if (!(vq->packed.event_flags_shadow & 
> > VRING_PACKED_EVENT_FLAG_DISABLE))
> > vq->packed.event_flags_shadow = 
> > VRING_PACKED_EVENT_FLAG_DISABLE;
> > +
> > +   if (vq->event_triggered)
>
> I don't get this one. if event_triggered why do you 

Re: [PATCH v6] virtio: add VIRTIO_F_NOTIFICATION_DATA feature support

2023-03-29 Thread Xuan Zhuo
On Thu, 30 Mar 2023 01:49:23 +0300, Viktor Prutyanov  wrote:
> On Fri, Mar 24, 2023 at 10:50 PM Viktor Prutyanov  wrote:
> >
> > According to VirtIO spec v1.2, VIRTIO_F_NOTIFICATION_DATA feature
> > indicates that the driver passes extra data along with the queue
> > notifications.
> >
> > In a split queue case, the extra data is 16-bit available index. In a
> > packed queue case, the extra data is 1-bit wrap counter and 15-bit
> > available index.
> >
> > Add support for this feature for MMIO, channel I/O and modern PCI
> > transports.
> >
> > Signed-off-by: Viktor Prutyanov 
> > Acked-by: Jason Wang 
> > Reviewed-by: Xuan Zhuo 
> > ---
> >  v6: use VRING_PACKED_EVENT_F_WRAP_CTR
> >  v5: replace ternary operator with if-else
> >  v4: remove VP_NOTIFY macro and legacy PCI support, add
> > virtio_ccw_kvm_notify_with_data to virtio_ccw
> >  v3: support feature in virtio_ccw, remove VM_NOTIFY, use avail_idx_shadow,
> > remove byte swap, rename to vring_notification_data
> >  v2: reject the feature in virtio_ccw, replace __le32 with u32
> >
> >  Tested with disabled VIRTIO_F_NOTIFICATION_DATA on qemu-system-s390x
> >  (virtio-blk-ccw), qemu-system-riscv64 (virtio-blk-device,
> >  virtio-rng-device), qemu-system-x86_64 (virtio-blk-pci, virtio-net-pci)
> >  to make sure nothing is broken.
> >  Tested with enabled VIRTIO_F_NOTIFICATION_DATA on 64-bit RISC-V Linux
> >  and my hardware implementation of virtio-rng with MMIO.
> >
> >  drivers/s390/virtio/virtio_ccw.c   | 22 +++---
> >  drivers/virtio/virtio_mmio.c   | 18 +-
> >  drivers/virtio/virtio_pci_modern.c | 17 -
> >  drivers/virtio/virtio_ring.c   | 19 +++
> >  include/linux/virtio_ring.h|  2 ++
> >  include/uapi/linux/virtio_config.h |  6 ++
> >  6 files changed, 79 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/s390/virtio/virtio_ccw.c 
> > b/drivers/s390/virtio/virtio_ccw.c
> > index 954fc31b4bc7..02922768b129 100644
> > --- a/drivers/s390/virtio/virtio_ccw.c
> > +++ b/drivers/s390/virtio/virtio_ccw.c
> > @@ -391,7 +391,7 @@ static void virtio_ccw_drop_indicator(struct 
> > virtio_ccw_device *vcdev,
> > ccw_device_dma_free(vcdev->cdev, thinint_area, 
> > sizeof(*thinint_area));
> >  }
> >
> > -static bool virtio_ccw_kvm_notify(struct virtqueue *vq)
> > +static inline bool virtio_ccw_do_kvm_notify(struct virtqueue *vq, u32 data)
> >  {
> > struct virtio_ccw_vq_info *info = vq->priv;
> > struct virtio_ccw_device *vcdev;
> > @@ -402,12 +402,22 @@ static bool virtio_ccw_kvm_notify(struct virtqueue 
> > *vq)
> > BUILD_BUG_ON(sizeof(struct subchannel_id) != sizeof(unsigned int));
> > info->cookie = kvm_hypercall3(KVM_S390_VIRTIO_CCW_NOTIFY,
> >   *((unsigned int *)),
> > - vq->index, info->cookie);
> > + data, info->cookie);
> > if (info->cookie < 0)
> > return false;
> > return true;
> >  }
> >
> > +static bool virtio_ccw_kvm_notify(struct virtqueue *vq)
> > +{
> > +   return virtio_ccw_do_kvm_notify(vq, vq->index);
> > +}
> > +
> > +static bool virtio_ccw_kvm_notify_with_data(struct virtqueue *vq)
> > +{
> > +   return virtio_ccw_do_kvm_notify(vq, vring_notification_data(vq));
> > +}
> > +
> >  static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev,
> >struct ccw1 *ccw, int index)
> >  {
> > @@ -495,6 +505,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct 
> > virtio_device *vdev,
> >  struct ccw1 *ccw)
> >  {
> > struct virtio_ccw_device *vcdev = to_vc_device(vdev);
> > +   bool (*notify)(struct virtqueue *vq);
> > int err;
> > struct virtqueue *vq = NULL;
> > struct virtio_ccw_vq_info *info;
> > @@ -502,6 +513,11 @@ static struct virtqueue *virtio_ccw_setup_vq(struct 
> > virtio_device *vdev,
> > unsigned long flags;
> > bool may_reduce;
> >
> > +   if (__virtio_test_bit(vdev, VIRTIO_F_NOTIFICATION_DATA))
> > +   notify = virtio_ccw_kvm_notify_with_data;
> > +   else
> > +   notify = virtio_ccw_kvm_notify;
> > +
> > /* Allocate queue. */
> > info = kzalloc(sizeof(struct virtio_ccw_vq_info), GFP_KERNEL);
> > if (!info) {
> > @@ -524,7 +540,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct 
> > virtio_device *vdev,
> > may_reduce = vcdev->revision > 0;
> > vq = vring_create_virtqueue(i, info->num, KVM_VIRTIO_CCW_RING_ALIGN,
> > vdev, true, may_reduce, ctx,
> > -   virtio_ccw_kvm_notify, callback, name);
> > +   notify, callback, name);
> >
> > if (!vq) {
> > /* For now, we fail if we can't get the requested size. */
> > diff 

Re: [RFC PATCH 0/4] eBPF RSS through QMP support.

2023-03-29 Thread Andrew Melnichenko
Oh yeah, I'll fix that. Thank you!

On Wed, Mar 29, 2023 at 2:52 PM Xuan Zhuo  wrote:
>
> Is this a patch-set of QEMU? If yes, why are the email lists all kernel mail
> list without QEMU mail list?
>
> Thanks.
>
> On Wed, 29 Mar 2023 13:45:41 +0300, Andrew Melnychenko  
> wrote:
> > This series of patches provides the ability to retrieve eBPF program
> > through qmp, so management application may load bpf blob with proper 
> > capabilities.
> > Now, virtio-net devices can accept eBPF programs and maps through properties
> > as external file descriptors. Access to the eBPF map is direct through 
> > mmap()
> > call, so it should not require additional capabilities to bpf* calls.
> > eBPF file descriptors can be passed to QEMU from parent process or by unix
> > socket with sendfd() qmp command.
> >
> > Overall, the basic scenario of using the helper looks like this:
> >  * Libvirt checks for ebpf_fds property.
> >  * Libvirt requests eBPF blob through QMP.
> >  * Libvirt loads blob for virtio-net.
> >  * Libvirt launches the QEMU with eBPF fds passed.
> >
> > Andrew Melnychenko (4):
> >   ebpf: Added eBPF initialization by fds and map update.
> >   virtio-net: Added property to load eBPF RSS with fds.
> >   ebpf: Added declaration/initialization routines.
> >   qmp: Added new command to retrieve eBPF blob.
> >
> >  ebpf/ebpf.c|  48 +
> >  ebpf/ebpf.h|  25 +++
> >  ebpf/ebpf_rss-stub.c   |   6 ++
> >  ebpf/ebpf_rss.c| 124 +++--
> >  ebpf/ebpf_rss.h|  10 +++
> >  ebpf/meson.build   |   1 +
> >  hw/net/virtio-net.c|  77 ++--
> >  include/hw/virtio/virtio-net.h |   1 +
> >  monitor/qmp-cmds.c |  17 +
> >  qapi/misc.json |  25 +++
> >  10 files changed, 307 insertions(+), 27 deletions(-)
> >  create mode 100644 ebpf/ebpf.c
> >  create mode 100644 ebpf/ebpf.h
> >
> > --
> > 2.39.1
> >
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [RFC PATCH v1 1/2] vsock: return errors other than -ENOMEM to socket

2023-03-29 Thread Vishnu Dasa via Virtualization


> On Mar 28, 2023, at 4:20 AM, Arseniy Krasnov  wrote:
> 
> !! External Email
> 
> On 28.03.2023 14:19, Stefano Garzarella wrote:
>> On Tue, Mar 28, 2023 at 01:42:19PM +0300, Arseniy Krasnov wrote:
>>> 
>>> 
>>> On 28.03.2023 12:42, Stefano Garzarella wrote:
 I pressed send too early...
 
 CCing Bryan, Vishnu, and pv-driv...@vmware.com
 
 On Tue, Mar 28, 2023 at 11:39 AM Stefano Garzarella  
 wrote:
> 
> On Sun, Mar 26, 2023 at 01:13:11AM +0300, Arseniy Krasnov wrote:
>> This removes behaviour, where error code returned from any transport
>> was always switched to ENOMEM. This works in the same way as:
>> commit
>> c43170b7e157 ("vsock: return errors other than -ENOMEM to socket"),
>> but for receive calls.
>> 
>> Signed-off-by: Arseniy Krasnov 
>> ---
>> net/vmw_vsock/af_vsock.c | 4 ++--
>> 1 file changed, 2 insertions(+), 2 deletions(-)
>> 
>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> index 19aea7cba26e..9262e0b77d47 100644
>> --- a/net/vmw_vsock/af_vsock.c
>> +++ b/net/vmw_vsock/af_vsock.c
>> @@ -2007,7 +2007,7 @@ static int __vsock_stream_recvmsg(struct sock *sk, 
>> struct msghdr *msg,
>> 
>>  read = transport->stream_dequeue(vsk, msg, len - copied, 
>> flags);
> 
> In vmci_transport_stream_dequeue() vmci_qpair_peekv() and
> vmci_qpair_dequev() return VMCI_ERROR_* in case of errors.
> 
> Maybe we should return -ENOMEM in vmci_transport_stream_dequeue() if
> those functions fail to keep the same behavior.
>>> 
>>> Yes, seems i missed it, because several months ago we had similar question 
>>> for send
>>> logic:
>>> https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.spinics.net%2Flists%2Fkernel%2Fmsg4611091.html=05%7C01%7Cvdasa%40vmware.com%7C3b17793425384debe75708db2f7eec8c%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C0%7C0%7C638155994413494900%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C=MMfFcKuFFvMcJrbToKvWvIB%2FZmzp%2BdGGVWFVWztuSzg%3D=0
>>> And it was ok to not handle VMCI send path in this way. So i think current 
>>> implementation
>>> for tx is a little bit buggy, because VMCI specific error from 
>>> 'vmci_qpair_enquev()' is
>>> returned to af_vsock.c. I think error conversion must be added to VMCI 
>>> transport for tx
>>> also.
>> 
>> Good point!
>> 
>> These are negative values, so there are no big problems, but I don't
>> know what the user expects in this case.
>> 
>> @Vishnu Do we want to return an errno to the user or a VMCI_ERROR_*?
> 
> Small remark, as i can see, VMCI_ERROR_ is not exported to user in 
> include/uapi,
> so IIUC user won't be able to interpret such values correctly.
> 
> Thanks, Arseniy

Let's just return -ENOMEM from vmci transport in case of error in
vmci_transport_stream_enqueue and vmci_transport_stream_dequeue.

@Arseniy,
Could you please add a separate patch in this set to handle the above?

Thanks,
Vishnu

> 
>> 
>> In both cases I think we should do the same for both enqueue and
>> dequeue.
>> 
>>> 
>>> Good thing is that Hyper-V uses general error codes.
>> 
>> Yeah!
>> 
>> Thanks,
>> Stefano
>> 
>>> 
>>> Thanks, Arseniy
> 
> CCing Bryan, Vishnu, and pv-driv...@vmware.com
> 
> The other transports seem okay to me.
> 
> Thanks,
> Stefano
> 
>>  if (read < 0) {
>> -  err = -ENOMEM;
>> +  err = read;
>>  break;
>>  }
>> 
>> @@ -2058,7 +2058,7 @@ static int __vsock_seqpacket_recvmsg(struct sock 
>> *sk, struct msghdr *msg,
>>  msg_len = transport->seqpacket_dequeue(vsk, msg, flags);
>> 
>>  if (msg_len < 0) {
>> -  err = -ENOMEM;
>> +  err = msg_len;
>>  goto out;
>>  }
>> 
>> --
>> 2.25.1
>> 
 
>>> 
>> 
> 
> !! External Email: This email originated from outside of the organization. Do 
> not click links or open attachments unless you recognize the sender.


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v4] virtio_ring: interrupt disable flag updated to vq even with event_triggered is set

2023-03-29 Thread Michael S. Tsirkin
On Wed, Mar 29, 2023 at 06:23:00PM +0800, Albert Huang wrote:
> From: "huangjie.albert" 
> 
> in virtio_net, if we disable the napi_tx, when we triger a tx interrupt,
> the vq->event_triggered will be set to true. It will no longer be set to
> false. Unless we explicitly call virtqueue_enable_cb_delayed or
> virtqueue_enable_cb_prepare.
> 
> If we disable the napi_tx, it will only be called when the tx ring
> buffer is relatively small.
> 
> Because event_triggered is true. Therefore, VRING_AVAIL_F_NO_INTERRUPT or
> VRING_PACKED_EVENT_FLAG_DISABLE will not be set. So we update
> vring_used_event(>split.vring) or vq->packed.vring.driver->off_wrap
> every time we call virtqueue_get_buf_ctx.This bring more interruptions.
> 
> To summarize:
> 1) event_triggered was set to true in vring_interrupt()
> 2) after this nothing will happen for virtqueue_disable_cb() so
>VRING_AVAIL_F_NO_INTERRUPT is not set in avail_flags_shadow
> 3) virtqueue_get_buf_ctx_split() will still think the cb is enabled
>then it tries to publish new event
> 
> To fix:
> update VRING_AVAIL_F_NO_INTERRUPT or VRING_PACKED_EVENT_FLAG_DISABLE to vq
> when we call virtqueue_disable_cb even the event_triggered is set to true.
> 
> Tested with iperf:
> iperf3 tcp stream:
> vm1 -> vm2
> vm2 just receives tcp data stream from vm1, and sends the ack to vm1,
> there are many tx interrupts in vm2.
> but without event_triggered there are just a few tx interrupts.
> 
> v2->v3:
> -update the interrupt disable flag even with the event_triggered is set,
> -instead of checking whether event_triggered is set in
> -virtqueue_get_buf_ctx_{packed/split}, will cause the drivers  which have
> -not called virtqueue_{enable/disable}_cb to miss notifications.
> 
> v3->v4:
> -remove change for
> -"if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE)"
> -in virtqueue_disable_cb_packed
> 
> Fixes: 8d622d21d248 ("virtio: fix up virtio_disable_cb")
> Signed-off-by: huangjie.albert 
> Signed-off-by: Michael S. Tsirkin 
> Signed-off-by: Jason Wang 

Can you confirm you tested 9p and it still works fine?

> ---
>  drivers/virtio/virtio_ring.c | 22 --
>  1 file changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index cbeeea1b0439..ec7ab8e04846 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -931,6 +931,14 @@ static void virtqueue_disable_cb_split(struct virtqueue 
> *_vq)
>  
>   if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
>   vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   if (vq->event)
>   /* TODO: this is a hack. Figure out a cleaner value to 
> write. */
>   vring_used_event(>split.vring) = 0x0;
> @@ -1761,6 +1769,14 @@ static void virtqueue_disable_cb_packed(struct 
> virtqueue *_vq)
>  
>   if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
>   vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   vq->packed.vring.driver->flags =
>   cpu_to_le16(vq->packed.event_flags_shadow);
>   }
> @@ -2462,12 +2478,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
>  {
>   struct vring_virtqueue *vq = to_vvq(_vq);
>  
> - /* If device triggered an event already it won't trigger one again:
> -  * no need to disable.
> -  */
> - if (vq->event_triggered)
> - return;
> -
>   if (vq->packed_ring)
>   virtqueue_disable_cb_packed(_vq);
>   else
> -- 
> 2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v4] virtio_ring: interrupt disable flag updated to vq even with event_triggered is set

2023-03-29 Thread Michael S. Tsirkin
On Wed, Mar 29, 2023 at 06:23:00PM +0800, Albert Huang wrote:
> From: "huangjie.albert" 
> 
> in virtio_net, if we disable the napi_tx, when we triger a tx interrupt,
> the vq->event_triggered will be set to true. It will no longer be set to
> false. Unless we explicitly call virtqueue_enable_cb_delayed or
> virtqueue_enable_cb_prepare.
> 
> If we disable the napi_tx, it will only be called when the tx ring
> buffer is relatively small.
> 
> Because event_triggered is true. Therefore, VRING_AVAIL_F_NO_INTERRUPT or
> VRING_PACKED_EVENT_FLAG_DISABLE will not be set. So we update
> vring_used_event(>split.vring) or vq->packed.vring.driver->off_wrap
> every time we call virtqueue_get_buf_ctx.This bring more interruptions.
> 
> To summarize:
> 1) event_triggered was set to true in vring_interrupt()
> 2) after this nothing will happen for virtqueue_disable_cb() so
>VRING_AVAIL_F_NO_INTERRUPT is not set in avail_flags_shadow
> 3) virtqueue_get_buf_ctx_split() will still think the cb is enabled
>then it tries to publish new event
> 
> To fix:
> update VRING_AVAIL_F_NO_INTERRUPT or VRING_PACKED_EVENT_FLAG_DISABLE to vq
> when we call virtqueue_disable_cb even the event_triggered is set to true.
> 
> Tested with iperf:
> iperf3 tcp stream:
> vm1 -> vm2
> vm2 just receives tcp data stream from vm1, and sends the ack to vm1,
> there are many tx interrupts in vm2.
> but without event_triggered there are just a few tx interrupts.
> 
> v2->v3:
> -update the interrupt disable flag even with the event_triggered is set,
> -instead of checking whether event_triggered is set in
> -virtqueue_get_buf_ctx_{packed/split}, will cause the drivers  which have
> -not called virtqueue_{enable/disable}_cb to miss notifications.
> 
> v3->v4:
> -remove change for
> -"if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE)"
> -in virtqueue_disable_cb_packed
> 
> Fixes: 8d622d21d248 ("virtio: fix up virtio_disable_cb")
> Signed-off-by: huangjie.albert 
> Signed-off-by: Michael S. Tsirkin 
> Signed-off-by: Jason Wang 

Patch is ok so I rewrote the commit log and applied.
Please do not add signed-off-by tags by others -
read up what this tag means.
I also rewrote your Signed-off-by to have your name
formatted same as in email - hope that is ok.


> ---
>  drivers/virtio/virtio_ring.c | 22 --
>  1 file changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index cbeeea1b0439..ec7ab8e04846 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -931,6 +931,14 @@ static void virtqueue_disable_cb_split(struct virtqueue 
> *_vq)
>  
>   if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
>   vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   if (vq->event)
>   /* TODO: this is a hack. Figure out a cleaner value to 
> write. */
>   vring_used_event(>split.vring) = 0x0;
> @@ -1761,6 +1769,14 @@ static void virtqueue_disable_cb_packed(struct 
> virtqueue *_vq)
>  
>   if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
>   vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   vq->packed.vring.driver->flags =
>   cpu_to_le16(vq->packed.event_flags_shadow);
>   }
> @@ -2462,12 +2478,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
>  {
>   struct vring_virtqueue *vq = to_vvq(_vq);
>  
> - /* If device triggered an event already it won't trigger one again:
> -  * no need to disable.
> -  */
> - if (vq->event_triggered)
> - return;
> -
>   if (vq->packed_ring)
>   virtqueue_disable_cb_packed(_vq);
>   else
> -- 
> 2.20.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v4] virtio_ring: interrupt disable flag updated to vq even with event_triggered is set

2023-03-29 Thread Michael S. Tsirkin
On Wed, Mar 29, 2023 at 07:45:01PM +0800, Xuan Zhuo wrote:
> On Wed, 29 Mar 2023 18:23:00 +0800, Albert Huang 
>  wrote:
> > From: "huangjie.albert" 
> >
> > in virtio_net, if we disable the napi_tx, when we triger a tx interrupt,
> > the vq->event_triggered will be set to true. It will no longer be set to
> > false. Unless we explicitly call virtqueue_enable_cb_delayed or
> > virtqueue_enable_cb_prepare.
> >
> > If we disable the napi_tx, it will only be called when the tx ring
> > buffer is relatively small.
> >
> > Because event_triggered is true. Therefore, VRING_AVAIL_F_NO_INTERRUPT or
> > VRING_PACKED_EVENT_FLAG_DISABLE will not be set. So we update
> > vring_used_event(>split.vring) or vq->packed.vring.driver->off_wrap
> > every time we call virtqueue_get_buf_ctx.This bring more interruptions.
> >
> > To summarize:
> > 1) event_triggered was set to true in vring_interrupt()
> > 2) after this nothing will happen for virtqueue_disable_cb() so
> >VRING_AVAIL_F_NO_INTERRUPT is not set in avail_flags_shadow
> > 3) virtqueue_get_buf_ctx_split() will still think the cb is enabled
> >then it tries to publish new event
> >
> > To fix:
> > update VRING_AVAIL_F_NO_INTERRUPT or VRING_PACKED_EVENT_FLAG_DISABLE to vq
> > when we call virtqueue_disable_cb even the event_triggered is set to true.
> >
> > Tested with iperf:
> > iperf3 tcp stream:
> > vm1 -> vm2
> > vm2 just receives tcp data stream from vm1, and sends the ack to vm1,
> > there are many tx interrupts in vm2.
> > but without event_triggered there are just a few tx interrupts.
> >
> > v2->v3:
> > -update the interrupt disable flag even with the event_triggered is set,
> > -instead of checking whether event_triggered is set in
> > -virtqueue_get_buf_ctx_{packed/split}, will cause the drivers  which have
> > -not called virtqueue_{enable/disable}_cb to miss notifications.
> >
> > v3->v4:
> > -remove change for
> > -"if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE)"
> > -in virtqueue_disable_cb_packed
> >
> > Fixes: 8d622d21d248 ("virtio: fix up virtio_disable_cb")
> 
> Hi Michael,
> 
> I want to know what the purpose of this patch("virtio: fix up 
> virtio_disable_cb")
> is. I re-review this patch. I didn't understand what the purpose of this
> patches. Does it reduce one write to vring_used_event(>split.vring) ?
> 
> Thanks.

Are you asking why I applied 8d622d21d248 ("virtio: fix up
virtio_disable_cb")?

It was a prerequisite to fixing interrupt storms we saw in the field previously.

See Message-ID: <20210413011508-mutt-send-email-...@kernel.org>
for the bug report and Message-ID: <20210413054733.36363-1-...@redhat.com>
for the original patchset fixing it.


> 
> > Signed-off-by: huangjie.albert 
> > Signed-off-by: Michael S. Tsirkin 
> > Signed-off-by: Jason Wang 
> > ---
> >  drivers/virtio/virtio_ring.c | 22 --
> >  1 file changed, 16 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index cbeeea1b0439..ec7ab8e04846 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -931,6 +931,14 @@ static void virtqueue_disable_cb_split(struct 
> > virtqueue *_vq)
> >
> > if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
> > vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> > +
> > +   /*
> > +* If device triggered an event already it won't trigger one 
> > again:
> > +* no need to disable.
> > +*/
> > +   if (vq->event_triggered)
> > +   return;
> > +
> > if (vq->event)
> > /* TODO: this is a hack. Figure out a cleaner value to 
> > write. */
> > vring_used_event(>split.vring) = 0x0;
> > @@ -1761,6 +1769,14 @@ static void virtqueue_disable_cb_packed(struct 
> > virtqueue *_vq)
> >
> > if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
> > vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
> > +
> > +   /*
> > +* If device triggered an event already it won't trigger one 
> > again:
> > +* no need to disable.
> > +*/
> > +   if (vq->event_triggered)
> > +   return;
> > +
> > vq->packed.vring.driver->flags =
> > cpu_to_le16(vq->packed.event_flags_shadow);
> > }
> > @@ -2462,12 +2478,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
> >  {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> >
> > -   /* If device triggered an event already it won't trigger one again:
> > -* no need to disable.
> > -*/
> > -   if (vq->event_triggered)
> > -   return;
> > -
> > if (vq->packed_ring)
> > virtqueue_disable_cb_packed(_vq);
> > else
> > --
> > 2.20.1
> >

___
Virtualization mailing list

Re: [PATCH v3] virtio_ring: interrupt disable flag updated to vq even with event_triggered is set

2023-03-29 Thread Michael S. Tsirkin
On Wed, Mar 29, 2023 at 03:27:03PM +0800, Xuan Zhuo wrote:
> Maybe one new thread is better.
> 
> Thanks.

I don't know but do not post same message twice please
without explanation. if you repost put "PATCH repost" in
the subject.

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3] virtio_ring: interrupt disable flag updated to vq even with event_triggered is set

2023-03-29 Thread Michael S. Tsirkin
Ican't parse the subject at all. I think the subject from v2
was fine - we are skipping event index updates on get buf.
Or maybe go higher level and describe the effect of the patch:

virtio_ring: reduce interrupt rate with event idx enabled


On Wed, Mar 29, 2023 at 03:21:35PM +0800, Albert Huang wrote:
> From: "huangjie.albert" 
> 
> in virtio_net, if we disable the napi_tx, when we triger a tx interrupt,
> the vq->event_triggered will be set to true. It will no longer be set to
> false.

this last sentence is redundant

> Unless we explicitly call virtqueue_enable_cb_delayed or
> virtqueue_enable_cb_prepare.
> 
> If we disable the napi_tx, it will only be called when the tx ring
> buffer is relatively small.
> 
> Because event_triggered is true. Therefore, VRING_AVAIL_F_NO_INTERRUPT or
> VRING_PACKED_EVENT_FLAG_DISABLE will not be set. So we update
> vring_used_event(>split.vring) or vq->packed.vring.driver->off_wrap
> every time we call virtqueue_get_buf_ctx.This bring more interruptions.

This will bring more interrupts. And pls add space after ".".

> 
> To summarize:
> 1) event_triggered was set to true in vring_interrupt()
> 2) after this nothing will happen for virtqueue_disable_cb() so
>VRING_AVAIL_F_NO_INTERRUPT is not set in avail_flags_shadow
> 3) virtqueue_get_buf_ctx_split() will still think the cb is enabled
>then it tries to publish new event
> 
> To fix:
> update VRING_AVAIL_F_NO_INTERRUPT or VRING_PACKED_EVENT_FLAG_DISABLE to vq
> when we call virtqueue_disable_cb even the event_triggered is set to true.

bad grammar here and way too much detail.  better:

make disable_cb set VRING_AVAIL_F_NO_INTERRUPT or
VRING_PACKED_EVENT_FLAG_DISABLE in flags shadow to make get_buf
correctly detect that callbacks are disabled.

> 
> Tested with iperf:
> iperf3 tcp stream:
> vm1 -> vm2
> vm2 just receives tcp data stream from vm1, and sends the ack to vm1,
> there are many tx interrupts in vm2.
> but without event_triggered there are just a few tx interrupts.
>

put changelog after --- please.
 
> v2->v3:
> -update the interrupt disable flag even with the event_triggered is set,
> -instead of checking whether event_triggered is set in
> -virtqueue_get_buf_ctx_{packed/split}, will cause the drivers  which have
> -not called virtqueue_{enable/disable}_cb to miss notifications.
> 
> Fixes: 8d622d21d248 ("virtio: fix up virtio_disable_cb")
> Signed-off-by: huangjie.albert 
> Signed-off-by: Michael S. Tsirkin 
> Signed-off-by: Jason Wang 
> ---
>  drivers/virtio/virtio_ring.c | 24 +---
>  1 file changed, 17 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 307e139cb11d..ad74463a48ee 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -812,6 +812,14 @@ static void virtqueue_disable_cb_split(struct virtqueue 
> *_vq)
>  
>   if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
>   vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   if (vq->event)
>   /* TODO: this is a hack. Figure out a cleaner value to 
> write. */
>   vring_used_event(>split.vring) = 0x0;
> @@ -1544,8 +1552,16 @@ static void virtqueue_disable_cb_packed(struct 
> virtqueue *_vq)
>  {
>   struct vring_virtqueue *vq = to_vvq(_vq);
>  
> - if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
> + if (!(vq->packed.event_flags_shadow & VRING_PACKED_EVENT_FLAG_DISABLE)) 
> {


Why are you making this change? It's not great because it
only works because VRING_PACKED_EVENT_FLAG_DISABLE happens
to equal 0x1. does not the patch work with the original
if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE)
?

Besides, if you are making unrelated changes commit log should
describe them.

>   vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   vq->packed.vring.driver->flags =
>   cpu_to_le16(vq->packed.event_flags_shadow);
>   }
> @@ -2063,12 +2079,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
>  {
>   struct vring_virtqueue *vq = to_vvq(_vq);
>  
> - /* If device triggered an event already it won't trigger one again:
> -  * no need to disable.
> -  */
> - if (vq->event_triggered)
> - return;
> -
>   if (vq->packed_ring)
>   

[PATCH] vdpa_sim_net: complete the initialization before register the device

2023-03-29 Thread Stefano Garzarella
Initialization must be completed before calling _vdpa_register_device()
since it can connect the device to the vDPA bus, so requests can arrive
after that call.

So for example vdpasim_net_work(), which uses the net->*_stats variables,
can be scheduled before they are initialized.

Let's move _vdpa_register_device() to the end of vdpasim_net_dev_add()
and add a comment to avoid future issues.

Fixes: 0899774cb360 ("vdpa_sim_net: vendor satistics")
Signed-off-by: Stefano Garzarella 
---

Notes:
I don't have a reproducer, but I became aware of this problem while
I was changing the buffer allocation.

In the block device, as soon as the device is registered, the driver
sends several requests, and I guess this might happen for the net
device as well.

Thanks,
Stefano

 drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c 
b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
index 862f405362de..dfe2ce341803 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@@ -466,16 +466,21 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev 
*mdev, const char *name,
 
vdpasim_net_setup_config(simdev, config);
 
-   ret = _vdpa_register_device(>vdpa, VDPASIM_NET_VQ_NUM);
-   if (ret)
-   goto reg_err;
-
net = sim_to_net(simdev);
 
u64_stats_init(>tx_stats.syncp);
u64_stats_init(>rx_stats.syncp);
u64_stats_init(>cq_stats.syncp);
 
+   /*
+* Initialization must be completed before this call, since it can
+* connect the device to the vDPA bus, so requests can arrive after
+* this call.
+*/
+   ret = _vdpa_register_device(>vdpa, VDPASIM_NET_VQ_NUM);
+   if (ret)
+   goto reg_err;
+
return 0;
 
 reg_err:
-- 
2.39.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[linux-next:master] BUILD REGRESSION 198925fae644b0099b66fac1d972721e6e563b17

2023-03-29 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 198925fae644b0099b66fac1d972721e6e563b17  Add linux-next specific 
files for 20230329

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202303082135.njdx1bij-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303161521.jbgbafjj-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303291916.ovlfjk2i-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

drivers/base/power/domain.c:3090:23: error: 'struct dev_pm_info' has no member 
named 'runtime_error'
drivers/base/power/domain.c:3092:28: error: 'struct dev_pm_info' has no member 
named 'disable_depth'
drivers/base/power/domain.c:3094:28: error: 'struct dev_pm_info' has no member 
named 'runtime_status'
drivers/base/power/domain.c:654:20: error: 'pm_wq' undeclared (first use in 
this function)
drivers/base/power/domain.c:853:39: error: 'struct dev_pm_info' has no member 
named 'ignore_children'
drivers/base/power/domain_governor.c:85:24: error: 'struct dev_pm_info' has no 
member named 'ignore_children'
drivers/clk/clk-sp7021.c:316:8: warning: result of comparison of constant 
18446744073709551615 with expression of type 'typeof (_Generic((_m), char: 
(unsigned char)0, unsigned char: (unsigned char)0, signed char: (unsigned 
char)0, unsigned short: (unsigned short)0, short: (unsigned short)0, unsigned 
int: (unsigned int)0, int: (unsigned int)0, unsigned long: (unsigned long)0, 
long: (unsigned long)0, unsigned long long: (unsigned long long)0, long long: 
(unsigned long long)0, default: (_m)))' (aka 'unsigned int') is always false 
[-Wtautological-constant-out-of-range-compare]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_validation.c:351:13: 
warning: variable 'bw_needed' set but not used [-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_validation.c:352:25: 
warning: variable 'link' set but not used [-Wunused-but-set-variable]
drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu13/smu_v13_0_6_ppt.c:309:17: sparse:  
  int
drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu13/smu_v13_0_6_ppt.c:309:17: sparse:  
  void
drivers/net/wireless/legacy/ray_cs.c:628:17: warning: 'strncpy' specified bound 
32 equals destination size [-Wstringop-truncation]

Unverified Error/Warning (likely false positive, please contact us if 
interested):

drivers/pinctrl/pinctrl-mlxbf3.c:162:20: sparse: sparse: symbol 
'mlxbf3_pmx_funcs' was not declared. Should it be static?
drivers/soc/fsl/qe/tsa.c:140:26: sparse: sparse: incorrect type in argument 2 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:140:9: sparse: sparse: incorrect type in argument 2 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:150:16: sparse: sparse: incorrect type in argument 1 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:150:27: sparse: sparse: incorrect type in argument 1 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:189:26: sparse: sparse: dereference of noderef 
expression
drivers/soc/fsl/qe/tsa.c:663:22: sparse: sparse: incorrect type in assignment 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:673:21: sparse: sparse: incorrect type in assignment 
(different address spaces)
drivers/virtio/virtio_ring.c:2784:3: warning: Value stored to 'err' is never 
read [clang-analyzer-deadcode.DeadStores]
drivers/watchdog/imx2_wdt.c:442:22: sparse: sparse: symbol 'imx_wdt' was not 
declared. Should it be static?
drivers/watchdog/imx2_wdt.c:446:22: sparse: sparse: symbol 'imx_wdt_legacy' was 
not declared. Should it be static?
include/linux/gpio/consumer.h: linux/err.h is included more than once.
include/linux/gpio/driver.h: asm/bug.h is included more than once.
io_uring/io_uring.c:432 io_prep_async_work() error: we previously assumed 
'req->file' could be null (see line 425)
io_uring/kbuf.c:221 __io_remove_buffers() warn: variable dereferenced before 
check 'bl->buf_ring' (see line 219)
net/mac80211/mesh_pathtbl.c:616:24: warning: Value stored to 'cache' during its 
initialization is never read [clang-analyzer-deadcode.DeadStores]

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
|-- alpha-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|   `-- 
drivers-net-wireless-legacy-ray_cs.c:warning:strncpy-specified-bound-equals-destination-size
|-- alpha-randconfig-r021-20230326
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-link-set-but-not-used
|-- arc-allyesconfig
|   |-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-bw_needed-set-but-not-used
|   `-- 
drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_validation.c:warning:variable-li

Re: [RFC PATCH v2 48/48] sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES)

2023-03-29 Thread David Howells
Hannes Reinecke  wrote:

> > [!] Note: This is a work in progress.  At the moment, some things won't
> >  build if this patch is applied.  nvme, kcm, smc, tls.

Actually, that needs updating.  nvme and smc now build.

> Weelll ... what happens to consumers of kernel_sendpage()?
> (Let's call them nvme ...)
> Should they be moved over, too?

Patch 42 should address NVMe, I think.  I can't test it, though, as I don't
have hardware.

There should be no callers of kernel_sendmsg() by the end of this patchset,
and the only remaining implementors of sendpage are Chelsio-TLS, AF_TLS and
AF_KCM, which as stated in the cover, aren't yet converted and won't build.

> Or what is the general consensus here?
> 
> (And what do we do with TLS? It does have a ->sendpage() version, too ...)

I know.  There are three things left that I need to tackle, but I'd like to
get opinions on some of the other bits and I might need some help with AF_TLS
and AF_KCM.

That said, should I just remove tls_sw_do_sendpage() since presumably the data
is going to get copied(?) and encrypted and the source pages aren't going to
be held onto?

David

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v2 48/48] sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES)

2023-03-29 Thread Hannes Reinecke

On 3/29/23 16:13, David Howells wrote:

[!] Note: This is a work in progress.  At the moment, some things won't
 build if this patch is applied.  nvme, kcm, smc, tls.

Remove ->sendpage() and ->sendpage_locked().  sendmsg() with
MSG_SPLICE_PAGES should be used instead.  This allows multiple pages and
multipage folios to be passed through.

Signed-off-by: David Howells 
Acked-by: Marc Kleine-Budde  # for net/can
cc: "David S. Miller" 
cc: Eric Dumazet 
cc: Jakub Kicinski 
cc: Paolo Abeni 
cc: Jens Axboe 
cc: Matthew Wilcox 
cc: b...@vger.kernel.org
cc: d...@vger.kernel.org
cc: linux-...@lists.infradead.org
cc: linux-arm-...@vger.kernel.org
cc: linux-...@vger.kernel.org
cc: linux-cry...@vger.kernel.org
cc: linux-...@vger.kernel.org
cc: linux-h...@vger.kernel.org
cc: linux-ker...@vger.kernel.org
cc: linux-r...@vger.kernel.org
cc: linux-s...@vger.kernel.org
cc: linux-w...@vger.kernel.org
cc: linux-...@vger.kernel.org
cc: mp...@lists.linux.dev
cc: net...@vger.kernel.org
cc: rds-de...@oss.oracle.com
cc: tipc-discuss...@lists.sourceforge.net
cc: virtualization@lists.linux-foundation.org
---
  Documentation/networking/scaling.rst |   4 +-
  crypto/af_alg.c  |  29 --
  crypto/algif_aead.c  |  22 +
  crypto/algif_rng.c   |   2 -
  crypto/algif_skcipher.c  |  14 ---
  include/linux/net.h  |   8 --
  include/net/inet_common.h|   2 -
  include/net/sock.h   |   6 --
  net/appletalk/ddp.c  |   1 -
  net/atm/pvc.c|   1 -
  net/atm/svc.c|   1 -
  net/ax25/af_ax25.c   |   1 -
  net/caif/caif_socket.c   |   2 -
  net/can/bcm.c|   1 -
  net/can/isotp.c  |   1 -
  net/can/j1939/socket.c   |   1 -
  net/can/raw.c|   1 -
  net/core/sock.c  |  35 +--
  net/dccp/ipv4.c  |   1 -
  net/dccp/ipv6.c  |   1 -
  net/ieee802154/socket.c  |   2 -
  net/ipv4/af_inet.c   |  21 
  net/ipv4/tcp.c   |  34 ---
  net/ipv4/tcp_bpf.c   |  21 +---
  net/ipv4/tcp_ipv4.c  |   1 -
  net/ipv4/udp.c   |  22 -
  net/ipv4/udp_impl.h  |   2 -
  net/ipv4/udplite.c   |   1 -
  net/ipv6/af_inet6.c  |   3 -
  net/ipv6/raw.c   |   1 -
  net/ipv6/tcp_ipv6.c  |   1 -
  net/key/af_key.c |   1 -
  net/l2tp/l2tp_ip.c   |   1 -
  net/l2tp/l2tp_ip6.c  |   1 -
  net/llc/af_llc.c |   1 -
  net/mctp/af_mctp.c   |   1 -
  net/mptcp/protocol.c |   2 -
  net/netlink/af_netlink.c |   1 -
  net/netrom/af_netrom.c   |   1 -
  net/packet/af_packet.c   |   2 -
  net/phonet/socket.c  |   2 -
  net/qrtr/af_qrtr.c   |   1 -
  net/rds/af_rds.c |   1 -
  net/rose/af_rose.c   |   1 -
  net/rxrpc/af_rxrpc.c |   1 -
  net/sctp/protocol.c  |   1 -
  net/socket.c |  48 -
  net/tipc/socket.c|   3 -
  net/unix/af_unix.c   | 139 ---
  net/vmw_vsock/af_vsock.c |   3 -
  net/x25/af_x25.c |   1 -
  net/xdp/xsk.c|   1 -
  52 files changed, 9 insertions(+), 447 deletions(-)


Weelll ... what happens to consumers of kernel_sendpage()?
(Let's call them nvme ...)
Should they be moved over, too?

Or what is the general consensus here?

(And what do we do with TLS? It does have a ->sendpage() version, too ...)

Cheers,

Hannes

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH v2 48/48] sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES)

2023-03-29 Thread David Howells
[!] Note: This is a work in progress.  At the moment, some things won't
build if this patch is applied.  nvme, kcm, smc, tls.

Remove ->sendpage() and ->sendpage_locked().  sendmsg() with
MSG_SPLICE_PAGES should be used instead.  This allows multiple pages and
multipage folios to be passed through.

Signed-off-by: David Howells 
Acked-by: Marc Kleine-Budde  # for net/can
cc: "David S. Miller" 
cc: Eric Dumazet 
cc: Jakub Kicinski 
cc: Paolo Abeni 
cc: Jens Axboe 
cc: Matthew Wilcox 
cc: b...@vger.kernel.org
cc: d...@vger.kernel.org
cc: linux-...@lists.infradead.org
cc: linux-arm-...@vger.kernel.org
cc: linux-...@vger.kernel.org
cc: linux-cry...@vger.kernel.org
cc: linux-...@vger.kernel.org
cc: linux-h...@vger.kernel.org
cc: linux-ker...@vger.kernel.org
cc: linux-r...@vger.kernel.org
cc: linux-s...@vger.kernel.org
cc: linux-w...@vger.kernel.org
cc: linux-...@vger.kernel.org
cc: mp...@lists.linux.dev
cc: net...@vger.kernel.org
cc: rds-de...@oss.oracle.com
cc: tipc-discuss...@lists.sourceforge.net
cc: virtualization@lists.linux-foundation.org
---
 Documentation/networking/scaling.rst |   4 +-
 crypto/af_alg.c  |  29 --
 crypto/algif_aead.c  |  22 +
 crypto/algif_rng.c   |   2 -
 crypto/algif_skcipher.c  |  14 ---
 include/linux/net.h  |   8 --
 include/net/inet_common.h|   2 -
 include/net/sock.h   |   6 --
 net/appletalk/ddp.c  |   1 -
 net/atm/pvc.c|   1 -
 net/atm/svc.c|   1 -
 net/ax25/af_ax25.c   |   1 -
 net/caif/caif_socket.c   |   2 -
 net/can/bcm.c|   1 -
 net/can/isotp.c  |   1 -
 net/can/j1939/socket.c   |   1 -
 net/can/raw.c|   1 -
 net/core/sock.c  |  35 +--
 net/dccp/ipv4.c  |   1 -
 net/dccp/ipv6.c  |   1 -
 net/ieee802154/socket.c  |   2 -
 net/ipv4/af_inet.c   |  21 
 net/ipv4/tcp.c   |  34 ---
 net/ipv4/tcp_bpf.c   |  21 +---
 net/ipv4/tcp_ipv4.c  |   1 -
 net/ipv4/udp.c   |  22 -
 net/ipv4/udp_impl.h  |   2 -
 net/ipv4/udplite.c   |   1 -
 net/ipv6/af_inet6.c  |   3 -
 net/ipv6/raw.c   |   1 -
 net/ipv6/tcp_ipv6.c  |   1 -
 net/key/af_key.c |   1 -
 net/l2tp/l2tp_ip.c   |   1 -
 net/l2tp/l2tp_ip6.c  |   1 -
 net/llc/af_llc.c |   1 -
 net/mctp/af_mctp.c   |   1 -
 net/mptcp/protocol.c |   2 -
 net/netlink/af_netlink.c |   1 -
 net/netrom/af_netrom.c   |   1 -
 net/packet/af_packet.c   |   2 -
 net/phonet/socket.c  |   2 -
 net/qrtr/af_qrtr.c   |   1 -
 net/rds/af_rds.c |   1 -
 net/rose/af_rose.c   |   1 -
 net/rxrpc/af_rxrpc.c |   1 -
 net/sctp/protocol.c  |   1 -
 net/socket.c |  48 -
 net/tipc/socket.c|   3 -
 net/unix/af_unix.c   | 139 ---
 net/vmw_vsock/af_vsock.c |   3 -
 net/x25/af_x25.c |   1 -
 net/xdp/xsk.c|   1 -
 52 files changed, 9 insertions(+), 447 deletions(-)

diff --git a/Documentation/networking/scaling.rst 
b/Documentation/networking/scaling.rst
index 3d435caa3ef2..92c9fb46d6a2 100644
--- a/Documentation/networking/scaling.rst
+++ b/Documentation/networking/scaling.rst
@@ -269,8 +269,8 @@ a single application thread handles flows with many 
different flow hashes.
 rps_sock_flow_table is a global flow table that contains the *desired* CPU
 for flows: the CPU that is currently processing the flow in userspace.
 Each table value is a CPU index that is updated during calls to recvmsg
-and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage()
-and tcp_splice_read()).
+and sendmsg (specifically, inet_recvmsg(), inet_sendmsg() and
+tcp_splice_read()).
 
 When the scheduler moves a thread to a new CPU while it has outstanding
 receive packets on the old CPU, packets may arrive out of order. To
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 686610a4986f..9f84816dcabf 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -483,7 +483,6 @@ static const struct proto_ops alg_proto_ops = {
.listen =   sock_no_listen,
.shutdown   =   sock_no_shutdown,
.mmap   =   sock_no_mmap,
-   .sendpage   =   sock_no_sendpage,
.sendmsg=   sock_no_sendmsg,
.recvmsg=   sock_no_recvmsg,
 
@@ -1110,34 +1109,6 @@ int af_alg_sendmsg(struct socket *sock, 

Re: [RFC PATCH 0/4] eBPF RSS through QMP support.

2023-03-29 Thread Xuan Zhuo
Is this a patch-set of QEMU? If yes, why are the email lists all kernel mail
list without QEMU mail list?

Thanks.

On Wed, 29 Mar 2023 13:45:41 +0300, Andrew Melnychenko  
wrote:
> This series of patches provides the ability to retrieve eBPF program
> through qmp, so management application may load bpf blob with proper 
> capabilities.
> Now, virtio-net devices can accept eBPF programs and maps through properties
> as external file descriptors. Access to the eBPF map is direct through mmap()
> call, so it should not require additional capabilities to bpf* calls.
> eBPF file descriptors can be passed to QEMU from parent process or by unix
> socket with sendfd() qmp command.
>
> Overall, the basic scenario of using the helper looks like this:
>  * Libvirt checks for ebpf_fds property.
>  * Libvirt requests eBPF blob through QMP.
>  * Libvirt loads blob for virtio-net.
>  * Libvirt launches the QEMU with eBPF fds passed.
>
> Andrew Melnychenko (4):
>   ebpf: Added eBPF initialization by fds and map update.
>   virtio-net: Added property to load eBPF RSS with fds.
>   ebpf: Added declaration/initialization routines.
>   qmp: Added new command to retrieve eBPF blob.
>
>  ebpf/ebpf.c|  48 +
>  ebpf/ebpf.h|  25 +++
>  ebpf/ebpf_rss-stub.c   |   6 ++
>  ebpf/ebpf_rss.c| 124 +++--
>  ebpf/ebpf_rss.h|  10 +++
>  ebpf/meson.build   |   1 +
>  hw/net/virtio-net.c|  77 ++--
>  include/hw/virtio/virtio-net.h |   1 +
>  monitor/qmp-cmds.c |  17 +
>  qapi/misc.json |  25 +++
>  10 files changed, 307 insertions(+), 27 deletions(-)
>  create mode 100644 ebpf/ebpf.c
>  create mode 100644 ebpf/ebpf.h
>
> --
> 2.39.1
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v4] virtio_ring: interrupt disable flag updated to vq even with event_triggered is set

2023-03-29 Thread Xuan Zhuo
On Wed, 29 Mar 2023 18:23:00 +0800, Albert Huang 
 wrote:
> From: "huangjie.albert" 
>
> in virtio_net, if we disable the napi_tx, when we triger a tx interrupt,
> the vq->event_triggered will be set to true. It will no longer be set to
> false. Unless we explicitly call virtqueue_enable_cb_delayed or
> virtqueue_enable_cb_prepare.
>
> If we disable the napi_tx, it will only be called when the tx ring
> buffer is relatively small.
>
> Because event_triggered is true. Therefore, VRING_AVAIL_F_NO_INTERRUPT or
> VRING_PACKED_EVENT_FLAG_DISABLE will not be set. So we update
> vring_used_event(>split.vring) or vq->packed.vring.driver->off_wrap
> every time we call virtqueue_get_buf_ctx.This bring more interruptions.
>
> To summarize:
> 1) event_triggered was set to true in vring_interrupt()
> 2) after this nothing will happen for virtqueue_disable_cb() so
>VRING_AVAIL_F_NO_INTERRUPT is not set in avail_flags_shadow
> 3) virtqueue_get_buf_ctx_split() will still think the cb is enabled
>then it tries to publish new event
>
> To fix:
> update VRING_AVAIL_F_NO_INTERRUPT or VRING_PACKED_EVENT_FLAG_DISABLE to vq
> when we call virtqueue_disable_cb even the event_triggered is set to true.
>
> Tested with iperf:
> iperf3 tcp stream:
> vm1 -> vm2
> vm2 just receives tcp data stream from vm1, and sends the ack to vm1,
> there are many tx interrupts in vm2.
> but without event_triggered there are just a few tx interrupts.
>
> v2->v3:
> -update the interrupt disable flag even with the event_triggered is set,
> -instead of checking whether event_triggered is set in
> -virtqueue_get_buf_ctx_{packed/split}, will cause the drivers  which have
> -not called virtqueue_{enable/disable}_cb to miss notifications.
>
> v3->v4:
> -remove change for
> -"if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE)"
> -in virtqueue_disable_cb_packed
>
> Fixes: 8d622d21d248 ("virtio: fix up virtio_disable_cb")

Hi Michael,

I want to know what the purpose of this patch("virtio: fix up 
virtio_disable_cb")
is. I re-review this patch. I didn't understand what the purpose of this
patches. Does it reduce one write to vring_used_event(>split.vring) ?

Thanks.


> Signed-off-by: huangjie.albert 
> Signed-off-by: Michael S. Tsirkin 
> Signed-off-by: Jason Wang 
> ---
>  drivers/virtio/virtio_ring.c | 22 --
>  1 file changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index cbeeea1b0439..ec7ab8e04846 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -931,6 +931,14 @@ static void virtqueue_disable_cb_split(struct virtqueue 
> *_vq)
>
>   if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
>   vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   if (vq->event)
>   /* TODO: this is a hack. Figure out a cleaner value to 
> write. */
>   vring_used_event(>split.vring) = 0x0;
> @@ -1761,6 +1769,14 @@ static void virtqueue_disable_cb_packed(struct 
> virtqueue *_vq)
>
>   if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
>   vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   vq->packed.vring.driver->flags =
>   cpu_to_le16(vq->packed.event_flags_shadow);
>   }
> @@ -2462,12 +2478,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
>  {
>   struct vring_virtqueue *vq = to_vvq(_vq);
>
> - /* If device triggered an event already it won't trigger one again:
> -  * no need to disable.
> -  */
> - if (vq->event_triggered)
> - return;
> -
>   if (vq->packed_ring)
>   virtqueue_disable_cb_packed(_vq);
>   else
> --
> 2.20.1
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH 5/5] ebpf: Updated eBPF program and skeleton.

2023-03-29 Thread Andrew Melnychenko
Updated section name, so libbpf should init/gues proper
program type without specifications during open/load.

Signed-off-by: Andrew Melnychenko 
---
 ebpf/rss.bpf.skeleton.h | 1469 ---
 tools/ebpf/rss.bpf.c|2 +-
 2 files changed, 741 insertions(+), 730 deletions(-)

diff --git a/ebpf/rss.bpf.skeleton.h b/ebpf/rss.bpf.skeleton.h
index 18eb2adb12..41b84aea44 100644
--- a/ebpf/rss.bpf.skeleton.h
+++ b/ebpf/rss.bpf.skeleton.h
@@ -176,162 +176,162 @@ err:
 
 static inline const void *rss_bpf__elf_bytes(size_t *sz)
 {
-   *sz = 20440;
+   *sz = 20720;
return (const void *)"\
 \x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
-\0\0\0\0\0\0\0\0\0\0\0\x98\x4c\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0d\0\
-\x01\0\xbf\x19\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\x54\xff\0\0\0\0\xbf\xa7\
-\0\0\0\0\0\0\x07\x07\0\0\x54\xff\xff\xff\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\xb0\x4d\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0d\0\
+\x01\0\xbf\x19\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\x4c\xff\0\0\0\0\xbf\xa7\
+\0\0\0\0\0\0\x07\x07\0\0\x4c\xff\xff\xff\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
 \xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x06\0\0\0\0\0\0\x18\x01\0\0\0\0\0\
 \0\0\0\0\0\0\0\0\0\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x08\0\0\0\0\0\0\
-\x18\0\0\0\xff\xff\xff\xff\0\0\0\0\0\0\0\0\x15\x06\x67\x02\0\0\0\0\xbf\x87\0\0\
-\0\0\0\0\x15\x07\x65\x02\0\0\0\0\x71\x61\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\
-\0\x5e\x02\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xc8\xff\0\0\0\0\x7b\x1a\xc0\xff\
-\0\0\0\0\x7b\x1a\xb8\xff\0\0\0\0\x7b\x1a\xb0\xff\0\0\0\0\x7b\x1a\xa8\xff\0\0\0\
-\0\x63\x1a\xa0\xff\0\0\0\0\x7b\x1a\x98\xff\0\0\0\0\x7b\x1a\x90\xff\0\0\0\0\x7b\
-\x1a\x88\xff\0\0\0\0\x7b\x1a\x80\xff\0\0\0\0\x7b\x1a\x78\xff\0\0\0\0\x7b\x1a\
-\x70\xff\0\0\0\0\x7b\x1a\x68\xff\0\0\0\0\x7b\x1a\x60\xff\0\0\0\0\x7b\x1a\x58\
-\xff\0\0\0\0\x15\x09\x4d\x02\0\0\0\0\x6b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\
-\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x91\0\0\0\0\0\0\xb7\x02\0\0\x0c\0\0\0\xb7\
+\x18\0\0\0\xff\xff\xff\xff\0\0\0\0\0\0\0\0\x15\x06\x64\x02\0\0\0\0\xbf\x87\0\0\
+\0\0\0\0\x15\x07\x62\x02\0\0\0\0\x71\x61\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\
+\0\x5b\x02\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xc0\xff\0\0\0\0\x7b\x1a\xb8\xff\
+\0\0\0\0\x7b\x1a\xb0\xff\0\0\0\0\x7b\x1a\xa8\xff\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\
+\0\x63\x1a\x98\xff\0\0\0\0\x7b\x1a\x90\xff\0\0\0\0\x7b\x1a\x88\xff\0\0\0\0\x7b\
+\x1a\x80\xff\0\0\0\0\x7b\x1a\x78\xff\0\0\0\0\x7b\x1a\x70\xff\0\0\0\0\x7b\x1a\
+\x68\xff\0\0\0\0\x7b\x1a\x60\xff\0\0\0\0\x7b\x1a\x58\xff\0\0\0\0\x7b\x1a\x50\
+\xff\0\0\0\0\x15\x09\x4a\x02\0\0\0\0\x6b\x1a\xc8\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\
+\0\x07\x03\0\0\xc8\xff\xff\xff\xbf\x91\0\0\0\0\0\0\xb7\x02\0\0\x0c\0\0\0\xb7\
 \x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\
-\x77\0\0\0\x20\0\0\0\x55\0\x42\x02\0\0\0\0\xb7\x02\0\0\x10\0\0\0\x69\xa1\xd0\
+\x77\0\0\0\x20\0\0\0\x55\0\x3f\x02\0\0\0\0\xb7\x02\0\0\x10\0\0\0\x69\xa1\xc8\
 \xff\0\0\0\0\xbf\x13\0\0\0\0\0\0\xdc\x03\0\0\x10\0\0\0\x15\x03\x02\0\0\x81\0\0\
 \x55\x03\x0b\0\xa8\x88\0\0\xb7\x02\0\0\x14\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\
-\0\xd0\xff\xff\xff\xbf\x91\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\
-\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x32\x02\0\
-\0\0\0\x69\xa1\xd0\xff\0\0\0\0\x15\x01\x30\x02\0\0\0\0\x7b\x7a\x38\xff\0\0\0\0\
-\x7b\x9a\x40\xff\0\0\0\0\x15\x01\x55\0\x86\xdd\0\0\x55\x01\x39\0\x08\0\0\0\xb7\
-\x07\0\0\x01\0\0\0\x73\x7a\x58\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xe0\xff\
-\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\
-\x07\x03\0\0\xd0\xff\xff\xff\x79\xa1\x40\xff\0\0\0\0\xb7\x02\0\0\0\0\0\0\xb7\
+\0\xc8\xff\xff\xff\xbf\x91\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\
+\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x2f\x02\0\
+\0\0\0\x69\xa1\xc8\xff\0\0\0\0\x15\x01\x2d\x02\0\0\0\0\x7b\x7a\x30\xff\0\0\0\0\
+\x7b\x9a\x38\xff\0\0\0\0\x15\x01\x55\0\x86\xdd\0\0\x55\x01\x39\0\x08\0\0\0\xb7\
+\x07\0\0\x01\0\0\0\x73\x7a\x50\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xd8\xff\
+\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\x7b\x1a\xc8\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\
+\x07\x03\0\0\xc8\xff\xff\xff\x79\xa1\x38\xff\0\0\0\0\xb7\x02\0\0\0\0\0\0\xb7\
 \x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\
-\0\x77\0\0\0\x20\0\0\0\x55\0\x1c\x02\0\0\0\0\x69\xa1\xd6\xff\0\0\0\0\x55\x01\
-\x01\0\0\0\0\0\xb7\x07\0\0\0\0\0\0\x61\xa1\xdc\xff\0\0\0\0\x63\x1a\x64\xff\0\0\
-\0\0\x61\xa1\xe0\xff\0\0\0\0\x63\x1a\x68\xff\0\0\0\0\x71\xa9\xd9\xff\0\0\0\0\
-\x73\x7a\x5e\xff\0\0\0\0\x71\xa1\xd0\xff\0\0\0\0\x67\x01\0\0\x02\0\0\0\x57\x01\
-\0\0\x3c\0\0\0\x7b\x1a\x48\xff\0\0\0\0\xbf\x91\0\0\0\0\0\0\x57\x01\0\0\xff\0\0\
+\0\x77\0\0\0\x20\0\0\0\x55\0\x19\x02\0\0\0\0\x69\xa1\xce\xff\0\0\0\0\x55\x01\
+\x01\0\0\0\0\0\xb7\x07\0\0\0\0\0\0\x61\xa1\xd4\xff\0\0\0\0\x63\x1a\x5c\xff\0\0\

[RFC PATCH 0/4] eBPF RSS through QMP support.

2023-03-29 Thread Andrew Melnychenko
This series of patches provides the ability to retrieve eBPF program
through qmp, so management application may load bpf blob with proper 
capabilities.
Now, virtio-net devices can accept eBPF programs and maps through properties
as external file descriptors. Access to the eBPF map is direct through mmap()
call, so it should not require additional capabilities to bpf* calls.
eBPF file descriptors can be passed to QEMU from parent process or by unix
socket with sendfd() qmp command.

Overall, the basic scenario of using the helper looks like this:
 * Libvirt checks for ebpf_fds property.
 * Libvirt requests eBPF blob through QMP.
 * Libvirt loads blob for virtio-net.
 * Libvirt launches the QEMU with eBPF fds passed.
 
Andrew Melnychenko (4):
  ebpf: Added eBPF initialization by fds and map update.
  virtio-net: Added property to load eBPF RSS with fds.
  ebpf: Added declaration/initialization routines.
  qmp: Added new command to retrieve eBPF blob.

 ebpf/ebpf.c|  48 +
 ebpf/ebpf.h|  25 +++
 ebpf/ebpf_rss-stub.c   |   6 ++
 ebpf/ebpf_rss.c| 124 +++--
 ebpf/ebpf_rss.h|  10 +++
 ebpf/meson.build   |   1 +
 hw/net/virtio-net.c|  77 ++--
 include/hw/virtio/virtio-net.h |   1 +
 monitor/qmp-cmds.c |  17 +
 qapi/misc.json |  25 +++
 10 files changed, 307 insertions(+), 27 deletions(-)
 create mode 100644 ebpf/ebpf.c
 create mode 100644 ebpf/ebpf.h

-- 
2.39.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH 4/5] qmp: Added new command to retrieve eBPF blob.

2023-03-29 Thread Andrew Melnychenko
Added command "request-ebpf". This command returns
eBPF program encoded base64. The program taken from the
skeleton and essentially is an ELF object that can be
loaded in the future with libbpf.

Signed-off-by: Andrew Melnychenko 
---
 monitor/qmp-cmds.c | 17 +
 qapi/misc.json | 25 +
 2 files changed, 42 insertions(+)

diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index b0f948d337..8f2fc3e7ec 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -32,6 +32,7 @@
 #include "hw/mem/memory-device.h"
 #include "hw/intc/intc.h"
 #include "hw/rdma/rdma.h"
+#include "ebpf/ebpf.h"
 
 NameInfo *qmp_query_name(Error **errp)
 {
@@ -209,3 +210,19 @@ static void __attribute__((__constructor__)) 
monitor_init_qmp_commands(void)
  qmp_marshal_qmp_capabilities,
  QCO_ALLOW_PRECONFIG, 0);
 }
+
+EbpfObject *qmp_request_ebpf(const char *id, Error **errp)
+{
+EbpfObject *ret = NULL;
+size_t size = 0;
+const guchar *data = ebpf_find_binary_by_id(id, );
+
+if (data) {
+ret = g_new0(EbpfObject, 1);
+ret->object = g_base64_encode(data, size);
+} else {
+error_setg(errp, "can't find eBPF object with id: %s", id);
+}
+
+return ret;
+}
diff --git a/qapi/misc.json b/qapi/misc.json
index 6ddd16ea28..4689802460 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -618,3 +618,28 @@
 { 'event': 'VFU_CLIENT_HANGUP',
   'data': { 'vfu-id': 'str', 'vfu-qom-path': 'str',
 'dev-id': 'str', 'dev-qom-path': 'str' } }
+
+##
+# @EbpfObject:
+#
+# Structure that holds eBPF ELF object encoded in base64.
+##
+{ 'struct': 'EbpfObject',
+  'data': {'object': 'str'} }
+
+##
+# @request-ebpf:
+#
+# Function returns eBPF object that can be loaded with libbpf.
+# Management applications (g.e. libvirt) may load it and pass file
+# descriptors to QEMU. Which allows running QEMU without BPF capabilities.
+#
+# Returns: RSS eBPF object encoded in base64.
+#
+# Since: 7.3
+#
+##
+{ 'command': 'request-ebpf',
+  'data': { 'id': 'str' },
+  'returns': 'EbpfObject' }
+
-- 
2.39.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH 2/5] virtio-net: Added property to load eBPF RSS with fds.

2023-03-29 Thread Andrew Melnychenko
eBPF RSS program and maps may now be passed during initialization.
Initially was implemented for libvirt to launch qemu without permissions,
and initialized eBPF program through the helper.

Signed-off-by: Andrew Melnychenko 
---
 hw/net/virtio-net.c| 77 --
 include/hw/virtio/virtio-net.h |  1 +
 2 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 53e1c32643..45d448a83d 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -42,6 +42,7 @@
 #include "sysemu/sysemu.h"
 #include "trace.h"
 #include "monitor/qdev.h"
+#include "monitor/monitor.h"
 #include "hw/pci/pci_device.h"
 #include "net_rx_pkt.h"
 #include "hw/virtio/vhost.h"
@@ -1305,14 +1306,81 @@ static void virtio_net_detach_epbf_rss(VirtIONet *n)
 virtio_net_attach_ebpf_to_backend(n->nic, -1);
 }
 
-static bool virtio_net_load_ebpf(VirtIONet *n)
+static int virtio_net_get_ebpf_rss_fds(char *str, char *fds[], int nfds)
+{
+char *ptr = str;
+char *cur = NULL;
+size_t len = strlen(str);
+int i = 0;
+
+for (; i < nfds && ptr < str + len;) {
+cur = strchr(ptr, ':');
+
+if (cur == NULL) {
+fds[i] = g_strdup(ptr);
+} else {
+fds[i] = g_strndup(ptr, cur - ptr);
+}
+
+i++;
+if (cur == NULL) {
+break;
+} else {
+ptr = cur + 1;
+}
+}
+
+return i;
+}
+
+static bool virtio_net_load_ebpf_fds(VirtIONet *n)
 {
-if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
-/* backend does't support steering ebpf */
+char *fds_strs[EBPF_RSS_MAX_FDS];
+int fds[EBPF_RSS_MAX_FDS];
+int nfds;
+int ret = false;
+Error *errp;
+int i = 0;
+
+if (n == NULL || !n->ebpf_rss_fds) {
 return false;
 }
 
-return ebpf_rss_load(>ebpf_rss);
+nfds = virtio_net_get_ebpf_rss_fds(n->ebpf_rss_fds,
+   fds_strs, EBPF_RSS_MAX_FDS);
+for (i = 0; i < nfds; i++) {
+fds[i] = monitor_fd_param(monitor_cur(), fds_strs[i], );
+}
+
+if (nfds == EBPF_RSS_MAX_FDS) {
+ret = ebpf_rss_load_fds(>ebpf_rss, fds[0], fds[1], fds[2], fds[3]);
+}
+
+if (!ret) {
+for (i = 0; i < nfds; i++) {
+close(fds[i]);
+}
+}
+
+for (i = 0; i < nfds; i++) {
+g_free(fds_strs[i]);
+}
+
+return ret;
+}
+
+static bool virtio_net_load_ebpf(VirtIONet *n)
+{
+bool ret = true;
+
+if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
+if (!(n->ebpf_rss_fds
+&& virtio_net_load_ebpf_fds(n))) {
+ret = ebpf_rss_load(>ebpf_rss);
+}
+}
+
+return ret;
 }
 
 static void virtio_net_unload_ebpf(VirtIONet *n)
@@ -3900,6 +3968,7 @@ static Property virtio_net_properties[] = {
 VIRTIO_NET_F_RSS, false),
 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
 VIRTIO_NET_F_HASH_REPORT, false),
+DEFINE_PROP_STRING("ebpf_rss_fds", VirtIONet, ebpf_rss_fds),
 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
 VIRTIO_NET_F_RSC_EXT, false),
 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index ef234ffe7e..e10ce88f91 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -219,6 +219,7 @@ struct VirtIONet {
 VirtioNetRssData rss_data;
 struct NetRxPkt *rx_pkt;
 struct EBPFRSSContext ebpf_rss;
+char *ebpf_rss_fds;
 };
 
 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
-- 
2.39.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH 3/5] ebpf: Added declaration/initialization routines.

2023-03-29 Thread Andrew Melnychenko
Now, the binary objects may be retrieved by id/name.
It would require for future qmp commands that may require specific
eBPF blob.

Signed-off-by: Andrew Melnychenko 
---
 ebpf/ebpf.c  | 48 
 ebpf/ebpf.h  | 25 +
 ebpf/ebpf_rss.c  |  4 
 ebpf/meson.build |  1 +
 4 files changed, 78 insertions(+)
 create mode 100644 ebpf/ebpf.c
 create mode 100644 ebpf/ebpf.h

diff --git a/ebpf/ebpf.c b/ebpf/ebpf.c
new file mode 100644
index 00..86320d72f5
--- /dev/null
+++ b/ebpf/ebpf.c
@@ -0,0 +1,48 @@
+/*
+ * QEMU eBPF binary declaration routine.
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ *  Andrew Melnychenko 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/queue.h"
+#include "ebpf/ebpf.h"
+
+struct ElfBinaryDataEntry {
+const char *id;
+const void * (*fn)(size_t *);
+
+QSLIST_ENTRY(ElfBinaryDataEntry) node;
+};
+
+static QSLIST_HEAD(, ElfBinaryDataEntry) ebpf_elf_obj_list =
+QSLIST_HEAD_INITIALIZER();
+
+void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *))
+{
+struct ElfBinaryDataEntry *data = NULL;
+
+data = g_malloc0(sizeof(*data));
+data->fn = fn;
+data->id = id;
+
+QSLIST_INSERT_HEAD(_elf_obj_list, data, node);
+}
+
+const void *ebpf_find_binary_by_id(const char *id, size_t *sz)
+{
+struct ElfBinaryDataEntry *it = NULL;
+QSLIST_FOREACH(it, _elf_obj_list, node) {
+if (strcmp(id, it->id) == 0) {
+return it->fn(sz);
+}
+}
+
+return NULL;
+}
diff --git a/ebpf/ebpf.h b/ebpf/ebpf.h
new file mode 100644
index 00..fd705cb73e
--- /dev/null
+++ b/ebpf/ebpf.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU eBPF binary declaration routine.
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ *  Andrew Melnychenko 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef EBPF_H
+#define EBPF_H
+
+void ebpf_register_binary_data(const char *id, const void * (*fn)(size_t *));
+const void *ebpf_find_binary_by_id(const char *id, size_t *sz);
+
+#define ebpf_binary_init(id, fn)   \
+static void __attribute__((constructor)) ebpf_binary_init_ ## fn(void) \
+{  \
+ebpf_register_binary_data(id, fn); \
+}
+
+#endif /* EBPF_H */
diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
index 08015fecb1..b4038725f2 100644
--- a/ebpf/ebpf_rss.c
+++ b/ebpf/ebpf_rss.c
@@ -21,6 +21,8 @@
 
 #include "ebpf/ebpf_rss.h"
 #include "ebpf/rss.bpf.skeleton.h"
+#include "ebpf/ebpf.h"
+
 #include "trace.h"
 
 void ebpf_rss_init(struct EBPFRSSContext *ctx)
@@ -237,3 +239,5 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
 ctx->obj = NULL;
 ctx->program_fd = -1;
 }
+
+ebpf_binary_init("rss", rss_bpf__elf_bytes)
diff --git a/ebpf/meson.build b/ebpf/meson.build
index 2dd0fd8948..67c3f53aa9 100644
--- a/ebpf/meson.build
+++ b/ebpf/meson.build
@@ -1 +1,2 @@
+softmmu_ss.add(files('ebpf.c'))
 softmmu_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: 
files('ebpf_rss-stub.c'))
-- 
2.39.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[RFC PATCH 1/5] ebpf: Added eBPF initialization by fds and map update.

2023-03-29 Thread Andrew Melnychenko
Changed eBPF map updates through mmaped array.
Mmaped arrays provide direct access to map data.
It should omit using bpf_map_update_elem() call,
which may require capabilities that are not present.

Signed-off-by: Andrew Melnychenko 
---
 ebpf/ebpf_rss-stub.c |   6 +++
 ebpf/ebpf_rss.c  | 120 ++-
 ebpf/ebpf_rss.h  |  10 
 3 files changed, 113 insertions(+), 23 deletions(-)

diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
index e71e229190..8d7fae2ad9 100644
--- a/ebpf/ebpf_rss-stub.c
+++ b/ebpf/ebpf_rss-stub.c
@@ -28,6 +28,12 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
 return false;
 }
 
+bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
+   int config_fd, int toeplitz_fd, int table_fd)
+{
+return false;
+}
+
 bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
   uint16_t *indirections_table, uint8_t *toeplitz_key)
 {
diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
index cee658c158..08015fecb1 100644
--- a/ebpf/ebpf_rss.c
+++ b/ebpf/ebpf_rss.c
@@ -27,19 +27,68 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
 {
 if (ctx != NULL) {
 ctx->obj = NULL;
+ctx->program_fd = -1;
 }
 }
 
 bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
 {
-return ctx != NULL && ctx->obj != NULL;
+return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
+}
+
+static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
+{
+if (!ebpf_rss_is_loaded(ctx)) {
+return false;
+}
+
+ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
+   PROT_READ | PROT_WRITE, MAP_SHARED,
+   ctx->map_configuration, 0);
+if (ctx->mmap_configuration == MAP_FAILED) {
+trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration array");
+return false;
+}
+ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
+   PROT_READ | PROT_WRITE, MAP_SHARED,
+   ctx->map_toeplitz_key, 0);
+if (ctx->mmap_toeplitz_key == MAP_FAILED) {
+trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
+goto toeplitz_fail;
+}
+ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
+   PROT_READ | PROT_WRITE, MAP_SHARED,
+   ctx->map_indirections_table, 0);
+if (ctx->mmap_indirections_table == MAP_FAILED) {
+trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection table");
+goto indirection_fail;
+}
+
+return true;
+
+indirection_fail:
+munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
+toeplitz_fail:
+munmap(ctx->mmap_configuration, qemu_real_host_page_size());
+return false;
+}
+
+static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
+{
+if (!ebpf_rss_is_loaded(ctx)) {
+return;
+}
+
+munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
+munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
+munmap(ctx->mmap_configuration, qemu_real_host_page_size());
 }
 
 bool ebpf_rss_load(struct EBPFRSSContext *ctx)
 {
 struct rss_bpf *rss_bpf_ctx;
 
-if (ctx == NULL) {
+if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
 return false;
 }
 
@@ -66,26 +115,51 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
 ctx->map_toeplitz_key = bpf_map__fd(
 rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
 
+if (!ebpf_rss_mmap(ctx)) {
+goto error;
+}
+
 return true;
 error:
 rss_bpf__destroy(rss_bpf_ctx);
 ctx->obj = NULL;
+ctx->program_fd = -1;
 
 return false;
 }
 
-static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
-struct EBPFRSSConfig *config)
+bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
+   int config_fd, int toeplitz_fd, int table_fd)
 {
-uint32_t map_key = 0;
+if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
+return false;
+}
 
-if (!ebpf_rss_is_loaded(ctx)) {
+if (program_fd < 0 || config_fd < 0 || toeplitz_fd < 0 || table_fd < 0) {
 return false;
 }
-if (bpf_map_update_elem(ctx->map_configuration,
-_key, config, 0) < 0) {
+
+ctx->program_fd = program_fd;
+ctx->map_configuration = config_fd;
+ctx->map_toeplitz_key = toeplitz_fd;
+ctx->map_indirections_table = table_fd;
+
+if (!ebpf_rss_mmap(ctx)) {
+ctx->program_fd = -1;
 return false;
 }
+
+return true;
+}
+
+static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
+struct EBPFRSSConfig *config)
+{
+if (!ebpf_rss_is_loaded(ctx)) {
+return false;
+}
+
+memcpy(ctx->mmap_configuration, config, sizeof(*config));
 return true;
 }

Re: [External] Re: [PATCH v3] virtio_ring: interrupt disable flag updated to vq even with event_triggered is set

2023-03-29 Thread Xuan Zhuo
On Wed, 29 Mar 2023 17:33:23 +0800, =?utf-8?b?6buE5p2w?= 
 wrote:
> Xuan Zhuo  于2023年3月29日周三 17:27写道:
> >
> > On Wed, 29 Mar 2023 15:28:41 +0800, Albert Huang 
> >  wrote:
> > > From: "huangjie.albert" 
> > >
> > > in virtio_net, if we disable the napi_tx, when we triger a tx interrupt,
> > > the vq->event_triggered will be set to true. It will no longer be set to
> > > false. Unless we explicitly call virtqueue_enable_cb_delayed or
> > > virtqueue_enable_cb_prepare.
> > >
> > > If we disable the napi_tx, it will only be called when the tx ring
> > > buffer is relatively small.
> > >
> > > Because event_triggered is true. Therefore, VRING_AVAIL_F_NO_INTERRUPT or
> > > VRING_PACKED_EVENT_FLAG_DISABLE will not be set. So we update
> > > vring_used_event(>split.vring) or vq->packed.vring.driver->off_wrap
> > > every time we call virtqueue_get_buf_ctx.This bring more interruptions.
> > >
> > > To summarize:
> > > 1) event_triggered was set to true in vring_interrupt()
> > > 2) after this nothing will happen for virtqueue_disable_cb() so
> > >VRING_AVAIL_F_NO_INTERRUPT is not set in avail_flags_shadow
> > > 3) virtqueue_get_buf_ctx_split() will still think the cb is enabled
> > >then it tries to publish new event
> > >
> > > To fix:
> > > update VRING_AVAIL_F_NO_INTERRUPT or VRING_PACKED_EVENT_FLAG_DISABLE to vq
> > > when we call virtqueue_disable_cb even the event_triggered is set to true.
> > >
> > > Tested with iperf:
> > > iperf3 tcp stream:
> > > vm1 -> vm2
> > > vm2 just receives tcp data stream from vm1, and sends the ack to vm1,
> > > there are many tx interrupts in vm2.
> > > but without event_triggered there are just a few tx interrupts.
> > >
> > > v2->v3:
> > > -update the interrupt disable flag even with the event_triggered is set,
> > > -instead of checking whether event_triggered is set in
> > > -virtqueue_get_buf_ctx_{packed/split}, will cause the drivers  which have
> > > -not called virtqueue_{enable/disable}_cb to miss notifications.
> > >
> > > Fixes: 8d622d21d248 ("virtio: fix up virtio_disable_cb")
> > > Signed-off-by: huangjie.albert 
> > > Signed-off-by: Michael S. Tsirkin 
> > > Signed-off-by: Jason Wang 
> > > ---
> > >  drivers/virtio/virtio_ring.c | 24 +---
> > >  1 file changed, 17 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 307e139cb11d..ad74463a48ee 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -812,6 +812,14 @@ static void virtqueue_disable_cb_split(struct 
> > > virtqueue *_vq)
> > >
> > >   if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
> > >   vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> > > +
> > > + /*
> > > +  * If device triggered an event already it won't trigger 
> > > one again:
> > > +  * no need to disable.
> > > +  */
> > > + if (vq->event_triggered)
> > > + return;
> > > +
> > >   if (vq->event)
> > >   /* TODO: this is a hack. Figure out a cleaner value 
> > > to write. */
> > >   vring_used_event(>split.vring) = 0x0;
> > > @@ -1544,8 +1552,16 @@ static void virtqueue_disable_cb_packed(struct 
> > > virtqueue *_vq)
> > >  {
> > >   struct vring_virtqueue *vq = to_vvq(_vq);
> > >
> > > - if (vq->packed.event_flags_shadow != 
> > > VRING_PACKED_EVENT_FLAG_DISABLE) {
> > > + if (!(vq->packed.event_flags_shadow & 
> > > VRING_PACKED_EVENT_FLAG_DISABLE)) {
> >
> > This seems to be another problem.
> >
> > Thanks.
> >
>
> Here, we are only making this change to maintain consistency with
> virtqueue_disable_cb_split.
> Is there any concern with doing so?

I'm not sure why the use_flags_shadow use "!=" to judge, but it seems
that all places are used like this. So we'd better keep the original. If it is
necessary to modify it here, I think a separate commit should be posted.
Even if it is just to keep it consistent with split.


Thanks.



>
> Thanks.
>
> >
> > >   vq->packed.event_flags_shadow = 
> > > VRING_PACKED_EVENT_FLAG_DISABLE;
> > > +
> > > + /*
> > > +  * If device triggered an event already it won't trigger 
> > > one again:
> > > +  * no need to disable.
> > > +  */
> > > + if (vq->event_triggered)
> > > + return;
> > > +
> > >   vq->packed.vring.driver->flags =
> > >   cpu_to_le16(vq->packed.event_flags_shadow);
> > >   }
> > > @@ -2063,12 +2079,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
> > >  {
> > >   struct vring_virtqueue *vq = to_vvq(_vq);
> > >
> > > - /* If device triggered an event already it won't trigger one again:
> > > -  * no need to disable.
> > > -  */
> > > - if (vq->event_triggered)
> > > - return;
> > > 

Re: [PATCH v3] virtio_ring: interrupt disable flag updated to vq even with event_triggered is set

2023-03-29 Thread Xuan Zhuo
On Wed, 29 Mar 2023 15:28:41 +0800, Albert Huang 
 wrote:
> From: "huangjie.albert" 
>
> in virtio_net, if we disable the napi_tx, when we triger a tx interrupt,
> the vq->event_triggered will be set to true. It will no longer be set to
> false. Unless we explicitly call virtqueue_enable_cb_delayed or
> virtqueue_enable_cb_prepare.
>
> If we disable the napi_tx, it will only be called when the tx ring
> buffer is relatively small.
>
> Because event_triggered is true. Therefore, VRING_AVAIL_F_NO_INTERRUPT or
> VRING_PACKED_EVENT_FLAG_DISABLE will not be set. So we update
> vring_used_event(>split.vring) or vq->packed.vring.driver->off_wrap
> every time we call virtqueue_get_buf_ctx.This bring more interruptions.
>
> To summarize:
> 1) event_triggered was set to true in vring_interrupt()
> 2) after this nothing will happen for virtqueue_disable_cb() so
>VRING_AVAIL_F_NO_INTERRUPT is not set in avail_flags_shadow
> 3) virtqueue_get_buf_ctx_split() will still think the cb is enabled
>then it tries to publish new event
>
> To fix:
> update VRING_AVAIL_F_NO_INTERRUPT or VRING_PACKED_EVENT_FLAG_DISABLE to vq
> when we call virtqueue_disable_cb even the event_triggered is set to true.
>
> Tested with iperf:
> iperf3 tcp stream:
> vm1 -> vm2
> vm2 just receives tcp data stream from vm1, and sends the ack to vm1,
> there are many tx interrupts in vm2.
> but without event_triggered there are just a few tx interrupts.
>
> v2->v3:
> -update the interrupt disable flag even with the event_triggered is set,
> -instead of checking whether event_triggered is set in
> -virtqueue_get_buf_ctx_{packed/split}, will cause the drivers  which have
> -not called virtqueue_{enable/disable}_cb to miss notifications.
>
> Fixes: 8d622d21d248 ("virtio: fix up virtio_disable_cb")
> Signed-off-by: huangjie.albert 
> Signed-off-by: Michael S. Tsirkin 
> Signed-off-by: Jason Wang 
> ---
>  drivers/virtio/virtio_ring.c | 24 +---
>  1 file changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 307e139cb11d..ad74463a48ee 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -812,6 +812,14 @@ static void virtqueue_disable_cb_split(struct virtqueue 
> *_vq)
>
>   if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
>   vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   if (vq->event)
>   /* TODO: this is a hack. Figure out a cleaner value to 
> write. */
>   vring_used_event(>split.vring) = 0x0;
> @@ -1544,8 +1552,16 @@ static void virtqueue_disable_cb_packed(struct 
> virtqueue *_vq)
>  {
>   struct vring_virtqueue *vq = to_vvq(_vq);
>
> - if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
> + if (!(vq->packed.event_flags_shadow & VRING_PACKED_EVENT_FLAG_DISABLE)) 
> {

This seems to be another problem.

Thanks.


>   vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   vq->packed.vring.driver->flags =
>   cpu_to_le16(vq->packed.event_flags_shadow);
>   }
> @@ -2063,12 +2079,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
>  {
>   struct vring_virtqueue *vq = to_vvq(_vq);
>
> - /* If device triggered an event already it won't trigger one again:
> -  * no need to disable.
> -  */
> - if (vq->event_triggered)
> - return;
> -
>   if (vq->packed_ring)
>   virtqueue_disable_cb_packed(_vq);
>   else
> --
> 2.31.1
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3] virtio_ring: interrupt disable flag updated to vq even with event_triggered is set

2023-03-29 Thread Xuan Zhuo
Maybe one new thread is better.

Thanks.

On Wed, 29 Mar 2023 15:21:35 +0800, Albert Huang 
 wrote:
> From: "huangjie.albert" 
>
> in virtio_net, if we disable the napi_tx, when we triger a tx interrupt,
> the vq->event_triggered will be set to true. It will no longer be set to
> false. Unless we explicitly call virtqueue_enable_cb_delayed or
> virtqueue_enable_cb_prepare.
>
> If we disable the napi_tx, it will only be called when the tx ring
> buffer is relatively small.
>
> Because event_triggered is true. Therefore, VRING_AVAIL_F_NO_INTERRUPT or
> VRING_PACKED_EVENT_FLAG_DISABLE will not be set. So we update
> vring_used_event(>split.vring) or vq->packed.vring.driver->off_wrap
> every time we call virtqueue_get_buf_ctx.This bring more interruptions.
>
> To summarize:
> 1) event_triggered was set to true in vring_interrupt()
> 2) after this nothing will happen for virtqueue_disable_cb() so
>VRING_AVAIL_F_NO_INTERRUPT is not set in avail_flags_shadow
> 3) virtqueue_get_buf_ctx_split() will still think the cb is enabled
>then it tries to publish new event
>
> To fix:
> update VRING_AVAIL_F_NO_INTERRUPT or VRING_PACKED_EVENT_FLAG_DISABLE to vq
> when we call virtqueue_disable_cb even the event_triggered is set to true.
>
> Tested with iperf:
> iperf3 tcp stream:
> vm1 -> vm2
> vm2 just receives tcp data stream from vm1, and sends the ack to vm1,
> there are many tx interrupts in vm2.
> but without event_triggered there are just a few tx interrupts.
>
> v2->v3:
> -update the interrupt disable flag even with the event_triggered is set,
> -instead of checking whether event_triggered is set in
> -virtqueue_get_buf_ctx_{packed/split}, will cause the drivers  which have
> -not called virtqueue_{enable/disable}_cb to miss notifications.
>
> Fixes: 8d622d21d248 ("virtio: fix up virtio_disable_cb")
> Signed-off-by: huangjie.albert 
> Signed-off-by: Michael S. Tsirkin 
> Signed-off-by: Jason Wang 
> ---
>  drivers/virtio/virtio_ring.c | 24 +---
>  1 file changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 307e139cb11d..ad74463a48ee 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -812,6 +812,14 @@ static void virtqueue_disable_cb_split(struct virtqueue 
> *_vq)
>
>   if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
>   vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   if (vq->event)
>   /* TODO: this is a hack. Figure out a cleaner value to 
> write. */
>   vring_used_event(>split.vring) = 0x0;
> @@ -1544,8 +1552,16 @@ static void virtqueue_disable_cb_packed(struct 
> virtqueue *_vq)
>  {
>   struct vring_virtqueue *vq = to_vvq(_vq);
>
> - if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
> + if (!(vq->packed.event_flags_shadow & VRING_PACKED_EVENT_FLAG_DISABLE)) 
> {
>   vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
> +
> + /*
> +  * If device triggered an event already it won't trigger one 
> again:
> +  * no need to disable.
> +  */
> + if (vq->event_triggered)
> + return;
> +
>   vq->packed.vring.driver->flags =
>   cpu_to_le16(vq->packed.event_flags_shadow);
>   }
> @@ -2063,12 +2079,6 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
>  {
>   struct vring_virtqueue *vq = to_vvq(_vq);
>
> - /* If device triggered an event already it won't trigger one again:
> -  * no need to disable.
> -  */
> - if (vq->event_triggered)
> - return;
> -
>   if (vq->packed_ring)
>   virtqueue_disable_cb_packed(_vq);
>   else
> --
> 2.31.1
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net v2] virtio/vsock: fix leaks due to missing skb owner

2023-03-29 Thread Stefano Garzarella

On Tue, Mar 28, 2023 at 04:29:09PM +, Bobby Eshleman wrote:

This patch sets the skb owner in the recv and send path for virtio.

For the send path, this solves the leak caused when
virtio_transport_purge_skbs() finds skb->sk is always NULL and therefore
never matches it with the current socket. Setting the owner upon
allocation fixes this.

For the recv path, this ensures correctness of accounting and also
correct transfer of ownership in vsock_loopback (when skbs are sent from
one socket and received by another).

Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff")
Signed-off-by: Bobby Eshleman 
Reported-by: Cong Wang 
Link: https://lore.kernel.org/all/ZCCbATwov4U+GBUv@pop-os.localdomain/
---
Changes in v2:
- virtio/vsock: add skb_set_owner_r to recv_pkt()
- Link to v1: 
https://lore.kernel.org/r/20230327-vsock-fix-leak-v1-1-3fede3671...@bytedance.com
---
net/vmw_vsock/virtio_transport_common.c | 5 +
1 file changed, 5 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index 957cdc01c8e8..900e5dca05f5 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -94,6 +94,9 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
 info->op,
 info->flags);

+   if (info->vsk)
+   skb_set_owner_w(skb, sk_vsock(info->vsk));
+
return skb;

out:
@@ -1294,6 +1297,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
goto free_pkt;
}

+   skb_set_owner_r(skb, sk);
+
vsk = vsock_sk(sk);

lock_sock(sk);


Can you explain why we are using skb_set_owner_w/skb_set_owner_r?

I'm a little concerned about 2 things:
- skb_set_owner_r() documentation says: "Stream and sequenced
  protocols can't normally use this as they need to fit buffers in
  and play with them."
- they increment sk_wmem_alloc and sk_rmem_alloc that we never used
  (IIRC)

For the long run, I think we should manage memory better, and using
socket accounting makes sense to me, but since we now have a different
system (which we have been carrying around since the introduction of
vsock), I think this change is a bit risky, especially as a fix.

So my suggestion is to use skb_set_owner_sk_safe() for now, unless I
missed something about why to use skb_set_owner_w/skb_set_owner_r.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization