Re: [PATCH v2 05/13] vdpa: rewind at get_base, not set_base
在 2023/2/8 17:42, Eugenio Pérez 写道: At this moment it is only possible to migrate to a vdpa device running with x-svq=on. As a protective measure, the rewind of the inflight descriptors was done at the destination. That way if the source sent a virtqueue with inuse descriptors they are always discarded. Since this series allows to migrate also to passthrough devices with no SVQ, the right thing to do is to rewind at the source so the base of vrings are correct. Support for inflight descriptors may be added in the future. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Thanks --- hw/virtio/vhost-vdpa.c | 24 +--- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 26e38a6aab..d99db0bd03 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -1211,18 +1211,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { struct vhost_vdpa *v = dev->opaque; -VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index); -/* - * vhost-vdpa devices does not support in-flight requests. Set all of them - * as available. - * - * TODO: This is ok for networking, but other kinds of devices might - * have problems with these retransmissions. - */ -while (virtqueue_rewind(vq, 1)) { -continue; -} if (v->shadow_vqs_enabled) { /* * Device vring base was set at device start. SVQ base is handled by @@ -1241,6 +1230,19 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, int ret; if (v->shadow_vqs_enabled) { +VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index); + +/* + * vhost-vdpa devices does not support in-flight requests. Set all of + * them as available. + * + * TODO: This is ok for networking, but other kinds of devices might + * have problems with these retransmissions. + */ +while (virtqueue_rewind(vq, 1)) { +continue; +} + ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index); return 0; } ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2 04/13] vdpa: move vhost reset after get vring base
在 2023/2/8 17:42, Eugenio Pérez 写道: The function vhost.c:vhost_dev_stop calls vhost operation vhost_dev_start(false). In the case of vdpa it totally reset and wipes the device, making the fetching of the vring base (virtqueue state) totally useless. The kernel backend does not use vhost_dev_start vhost op callback, but vhost-user do. A patch to make vhost_user_dev_start more similar to vdpa is desirable, but it can be added on top. Signed-off-by: Eugenio Pérez --- include/hw/virtio/vhost-backend.h | 4 hw/virtio/vhost-vdpa.c| 22 -- hw/virtio/vhost.c | 3 +++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index c5ab49051e..ec3fbae58d 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev, int fd); + +typedef void (*vhost_reset_status_op)(struct vhost_dev *dev); + typedef struct VhostOps { VhostBackendType backend_type; vhost_backend_init vhost_backend_init; @@ -177,6 +180,7 @@ typedef struct VhostOps { vhost_get_device_id_op vhost_get_device_id; vhost_force_iommu_op vhost_force_iommu; vhost_set_config_call_op vhost_set_config_call; +vhost_reset_status_op vhost_reset_status; } VhostOps; int vhost_backend_update_device_iotlb(struct vhost_dev *dev, diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index cbbe92ffe8..26e38a6aab 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -1152,14 +1152,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) if (started) { memory_listener_register(>listener, _space_memory); return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); -} else { -vhost_vdpa_reset_device(dev); -vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | - VIRTIO_CONFIG_S_DRIVER); -memory_listener_unregister(>listener); +} -return 0; +return 0; +} + +static void vhost_vdpa_reset_status(struct vhost_dev *dev) +{ +struct vhost_vdpa *v = dev->opaque; + +if (dev->vq_index + dev->nvqs != dev->vq_index_end) { +return; } + +vhost_vdpa_reset_device(dev); +vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | +VIRTIO_CONFIG_S_DRIVER); +memory_listener_unregister(>listener); } static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, @@ -1346,4 +1355,5 @@ const VhostOps vdpa_ops = { .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, .vhost_force_iommu = vhost_vdpa_force_iommu, .vhost_set_config_call = vhost_vdpa_set_config_call, +.vhost_reset_status = vhost_vdpa_reset_status, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index eb8c4c378c..a266396576 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -2049,6 +2049,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) hdev->vqs + i, hdev->vq_index + i); } +if (hdev->vhost_ops->vhost_reset_status) { +hdev->vhost_ops->vhost_reset_status(hdev); +} This looks racy, if we don't suspend/reset the device, device can move last_avail_idx even after get_vring_base()? Instead of doing things like this, should we fallback to virtio_queue_restore_last_avail_idx() in this case? Thanks if (vhost_dev_has_iommu(hdev)) { if (hdev->vhost_ops->vhost_set_iotlb_callback) { ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2 03/13] vdpa: add vhost_vdpa_suspend
在 2023/2/21 13:27, Jason Wang 写道: 在 2023/2/8 17:42, Eugenio Pérez 写道: The function vhost.c:vhost_dev_stop fetches the vring base so the vq state can be migrated to other devices. However, this is unreliable in vdpa, since we didn't signal the device to suspend the queues, making the value fetched useless. Suspend the device if possible before fetching first and subsequent vring bases. Moreover, vdpa totally reset and wipes the device at the last device before fetch its vrings base, making that operation useless in the last device. This will be fixed in later patches of this series. It would be better not introduce a bug first and fix it in the following patch. Signed-off-by: Eugenio Pérez --- hw/virtio/vhost-vdpa.c | 19 +++ hw/virtio/trace-events | 1 + 2 files changed, 20 insertions(+) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 2e79fbe4b2..cbbe92ffe8 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -1108,6 +1108,24 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) } } +static void vhost_vdpa_suspend(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + int r; + + if (!vhost_vdpa_first_dev(dev) || Any reason we need to use vhost_vdpa_first_dev() instead of replacing the if (started) { } else { vhost_vdpa_reset_device(dev); } Ok, I think I kind of understand, so I think we need re-order the patches, at least patch 4 should come before this patch? Thanks We check if (dev->vq_index + dev->nvqs != dev->vq_index_end) in vhost_vdpa_dev_start() but vhost_vdpa_first_dev() inside vhost_vdpa_suspend(). This will result code that is hard to maintain. Thanks + !(dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND))) { + return; + } + + trace_vhost_vdpa_suspend(dev); + r = ioctl(v->device_fd, VHOST_VDPA_SUSPEND); + if (unlikely(r)) { + error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno); + /* Not aborting since we're called from stop context */ + } +} + static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) { struct vhost_vdpa *v = dev->opaque; @@ -1122,6 +1140,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) } vhost_vdpa_set_vring_ready(dev); } else { + vhost_vdpa_suspend(dev); vhost_vdpa_svqs_stop(dev); vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); } diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index a87c5f39a2..8f8d05cf9b 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -50,6 +50,7 @@ vhost_vdpa_set_vring_ready(void *dev) "dev: %p" vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 +vhost_vdpa_suspend(void *dev) "dev: %p" vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d" vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p" vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2 03/13] vdpa: add vhost_vdpa_suspend
在 2023/2/8 17:42, Eugenio Pérez 写道: The function vhost.c:vhost_dev_stop fetches the vring base so the vq state can be migrated to other devices. However, this is unreliable in vdpa, since we didn't signal the device to suspend the queues, making the value fetched useless. Suspend the device if possible before fetching first and subsequent vring bases. Moreover, vdpa totally reset and wipes the device at the last device before fetch its vrings base, making that operation useless in the last device. This will be fixed in later patches of this series. It would be better not introduce a bug first and fix it in the following patch. Signed-off-by: Eugenio Pérez --- hw/virtio/vhost-vdpa.c | 19 +++ hw/virtio/trace-events | 1 + 2 files changed, 20 insertions(+) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 2e79fbe4b2..cbbe92ffe8 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -1108,6 +1108,24 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) } } +static void vhost_vdpa_suspend(struct vhost_dev *dev) +{ +struct vhost_vdpa *v = dev->opaque; +int r; + +if (!vhost_vdpa_first_dev(dev) || Any reason we need to use vhost_vdpa_first_dev() instead of replacing the if (started) { } else { vhost_vdpa_reset_device(dev); } We check if (dev->vq_index + dev->nvqs != dev->vq_index_end) in vhost_vdpa_dev_start() but vhost_vdpa_first_dev() inside vhost_vdpa_suspend(). This will result code that is hard to maintain. Thanks +!(dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND))) { +return; +} + +trace_vhost_vdpa_suspend(dev); +r = ioctl(v->device_fd, VHOST_VDPA_SUSPEND); +if (unlikely(r)) { +error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno); +/* Not aborting since we're called from stop context */ +} +} + static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) { struct vhost_vdpa *v = dev->opaque; @@ -1122,6 +1140,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) } vhost_vdpa_set_vring_ready(dev); } else { +vhost_vdpa_suspend(dev); vhost_vdpa_svqs_stop(dev); vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); } diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index a87c5f39a2..8f8d05cf9b 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -50,6 +50,7 @@ vhost_vdpa_set_vring_ready(void *dev) "dev: %p" vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 +vhost_vdpa_suspend(void *dev) "dev: %p" vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d" vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p" vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [patch net-next] net: virtio_net: implement exact header length guest feature
在 2023/2/20 21:56, Jiri Pirko 写道: Mon, Feb 20, 2023 at 01:55:33PM CET, m...@redhat.com wrote: On Mon, Feb 20, 2023 at 09:35:00AM +0100, Jiri Pirko wrote: Fri, Feb 17, 2023 at 02:47:36PM CET, m...@redhat.com wrote: On Fri, Feb 17, 2023 at 01:53:55PM +0100, Jiri Pirko wrote: Fri, Feb 17, 2023 at 01:22:01PM CET, m...@redhat.com wrote: On Fri, Feb 17, 2023 at 01:15:47PM +0100, Jiri Pirko wrote: From: Jiri Pirko virtio_net_hdr_from_skb() fills up hdr_len to skb_headlen(skb). Virtio spec introduced a feature VIRTIO_NET_F_GUEST_HDRLEN which when set implicates that the driver provides the exact size of the header. The driver already complies to fill the correct value. Introduce the feature and advertise it. Signed-off-by: Jiri Pirko Could you add a bit of motivation just for the record? Does this improve performance for some card? By how much? Expected to help some future card? I can get that info, but isn't that rather something to be appended to the virtio-spec patch? I mean, the feature is there, this is just implementing it in one driver. It is more like using it in the driver. It's not like we have to use everything - it could be useful for e.g. dpdk but not linux. Implementing it in the Linux driver has support costs - for example what if there's a bug and sometimes the length is incorrect? We'll be breaking things. I understand. To my understanding this feature just fixes the original ambiguity in the virtio spec. Quoting the original virtio spec: "hdr_len is a hint to the device as to how much of the header needs to be kept to copy into each packet" "a hint" might not be clear for the reader what does it mean, if it is "maybe like that" of "exactly like that". This feature just makes it crystal clear. If you look at the tap implementation, it uses hdr_len to alloc skb linear part. No hint, it counts with the provided value. So if the driver is currently not precise, it breaks tap. Well that's only for gso though right? Yep. And making it bigger than necessary works fine ... Well yeah. But tap does not do that, does it? it uses hdr_len directly. tap_get_user() limit the head length: static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, struct iov_iter *from, int noblock) { int good_linear = SKB_MAX_HEAD(TAP_RESERVE); ... I will add this to the patch description and send v2. I feel this does not answer the question yet, or maybe I am being dense. My point was not about making hdr_len precise. My point was that we are making a change here for no apparent reason. I am guessing you are not doing it for fun - so why? Is there a device with this feature bit you are aware of? Afaik real hw which does emulation of virtio_net would benefit from that, our hw including. Note that driver can choose to no negotiate this feature, so malicious drivers can still try to use illegal value. Thanks The patch was submitted by Marvell but they never bothered with using it in Linux. I guess they are using it for something else? CC Vitaly who put this in. thanks! --- drivers/net/virtio_net.c| 6 -- include/uapi/linux/virtio_net.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fb5e68ed3ec2..e85b03988733 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -62,7 +62,8 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_USO4, - VIRTIO_NET_F_GUEST_USO6 + VIRTIO_NET_F_GUEST_USO6, + VIRTIO_NET_F_GUEST_HDRLEN }; #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ @@ -4213,7 +4214,8 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ + VIRTIO_NET_F_GUEST_HDRLEN static unsigned int features[] = { VIRTNET_FEATURES, diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index b4062bed186a..12c1c9699935 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -61,6 +61,7 @@ #define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */ #define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ +#define VIRTIO_NET_F_GUEST_HDRLEN 59 /* Guest provides the exact hdr_len value. */ #define VIRTIO_NET_F_RSS60/* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT61/* extended coalescing info */ #define VIRTIO_NET_F_STANDBY62/* Act as standby for another device -- 2.39.0
Re: [patch net-next] net: virtio_net: implement exact header length guest feature
On Mon, Feb 20, 2023 at 8:55 PM Michael S. Tsirkin wrote: > > On Mon, Feb 20, 2023 at 09:35:00AM +0100, Jiri Pirko wrote: > > Fri, Feb 17, 2023 at 02:47:36PM CET, m...@redhat.com wrote: > > >On Fri, Feb 17, 2023 at 01:53:55PM +0100, Jiri Pirko wrote: > > >> Fri, Feb 17, 2023 at 01:22:01PM CET, m...@redhat.com wrote: > > >> >On Fri, Feb 17, 2023 at 01:15:47PM +0100, Jiri Pirko wrote: > > >> >> From: Jiri Pirko > > >> >> > > >> >> virtio_net_hdr_from_skb() fills up hdr_len to skb_headlen(skb). > > >> >> > > >> >> Virtio spec introduced a feature VIRTIO_NET_F_GUEST_HDRLEN which when > > >> >> set implicates that the driver provides the exact size of the header. > > >> >> > > >> >> The driver already complies to fill the correct value. Introduce the > > >> >> feature and advertise it. > > >> >> > > >> >> Signed-off-by: Jiri Pirko > > >> > > > >> >Could you add a bit of motivation just for the record? > > >> >Does this improve performance for some card? By how much? > > >> >Expected to help some future card? > > >> > > >> I can get that info, but isn't that rather something to be appended to > > >> the virtio-spec patch? I mean, the feature is there, this is just > > >> implementing it in one driver. > > > > > >It is more like using it in the driver. It's not like we have to use > > >everything - it could be useful for e.g. dpdk but not linux. > > >Implementing it in the Linux driver has support costs - for example what > > >if there's a bug and sometimes the length is incorrect? > > >We'll be breaking things. > > > > I understand. To my understanding this feature just fixes the original > > ambiguity in the virtio spec. > > > > Quoting the original virtio spec: > > "hdr_len is a hint to the device as to how much of the header needs to > > be kept to copy into each packet" > > > > "a hint" might not be clear for the reader what does it mean, if it is > > "maybe like that" of "exactly like that". This feature just makes it > > crystal clear. > > > > If you look at the tap implementation, it uses hdr_len to alloc > > skb linear part. No hint, it counts with the provided value. > > So if the driver is currently not precise, it breaks tap. > > Well that's only for gso though right? > And making it bigger than necessary works fine ... It should work otherwise it's a bug after this commit: commit 96f8d9ecf227638c89f98ccdcdd50b569891976c Author: Jason Wang Date: Wed Nov 13 14:00:39 2013 +0800 tuntap: limit head length of skb allocated Thanks > > > I will add this to the patch description and send v2. > > > > I feel this does not answer the question yet, or maybe I am being dense. > My point was not about making hdr_len precise. My point was that we are > making a change here for no apparent reason. I am guessing you are not > doing it for fun - so why? Is there a device with this feature bit > you are aware of? > > > > > > > > > > >The patch was submitted by Marvell but they never bothered with > > >using it in Linux. I guess they are using it for something else? > > >CC Vitaly who put this in. > > > > > >> > > >> > > > >> >thanks! > > >> > > > >> > > > >> >> --- > > >> >> drivers/net/virtio_net.c| 6 -- > > >> >> include/uapi/linux/virtio_net.h | 1 + > > >> >> 2 files changed, 5 insertions(+), 2 deletions(-) > > >> >> > > >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > >> >> index fb5e68ed3ec2..e85b03988733 100644 > > >> >> --- a/drivers/net/virtio_net.c > > >> >> +++ b/drivers/net/virtio_net.c > > >> >> @@ -62,7 +62,8 @@ static const unsigned long guest_offloads[] = { > > >> >> VIRTIO_NET_F_GUEST_UFO, > > >> >> VIRTIO_NET_F_GUEST_CSUM, > > >> >> VIRTIO_NET_F_GUEST_USO4, > > >> >> - VIRTIO_NET_F_GUEST_USO6 > > >> >> + VIRTIO_NET_F_GUEST_USO6, > > >> >> + VIRTIO_NET_F_GUEST_HDRLEN > > >> >> }; > > >> >> > > >> >> #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) > > >> >> | \ > > >> >> @@ -4213,7 +4214,8 @@ static struct virtio_device_id id_table[] = { > > >> >> VIRTIO_NET_F_CTRL_MAC_ADDR, \ > > >> >> VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > > >> >> VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > > >> >> - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, > > >> >> VIRTIO_NET_F_NOTF_COAL > > >> >> + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, > > >> >> VIRTIO_NET_F_NOTF_COAL, \ > > >> >> + VIRTIO_NET_F_GUEST_HDRLEN > > >> >> > > >> >> static unsigned int features[] = { > > >> >> VIRTNET_FEATURES, > > >> >> diff --git a/include/uapi/linux/virtio_net.h > > >> >> b/include/uapi/linux/virtio_net.h > > >> >> index b4062bed186a..12c1c9699935 100644 > > >> >> --- a/include/uapi/linux/virtio_net.h > > >> >> +++ b/include/uapi/linux/virtio_net.h > > >> >> @@ -61,6 +61,7 @@ > > >> >> #define VIRTIO_NET_F_GUEST_USO655 /* Guest can handle > > >> >> USOv6 in. */ > > >> >> #define VIRTIO_NET_F_HOST_USO 56 /* Host can
Re: [PATCH vhost 08/10] virtio_ring: introduce dma sync api for virtio
On Mon, Feb 20, 2023 at 3:05 PM Xuan Zhuo wrote: > > On Mon, 20 Feb 2023 13:38:20 +0800, Jason Wang wrote: > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo > > wrote: > > > > > > These API has been introduced: > > > > > > * virtio_dma_need_sync > > > * virtio_dma_sync_single_range_for_cpu > > > * virtio_dma_sync_single_range_for_device > > > > What's the advantages of exporting internal logic like > > virtio_dma_need_sync() over hiding it in > > virtio_dma_sync_single_range_for_cpu() and > > virtio_dma_sync_single_range_for_device()? > > Sorry, I didn't understand it. I meant: virtio_dma_sync_single_range_for_cpu() { if (!virtio_dma_need_sync()) return; .. } Thanks > > Thanks. > > > > > Thanks > > > > > > > > > > These APIs can be used together with the premapped mechanism to sync the > > > DMA address. > > > > > > Signed-off-by: Xuan Zhuo > > > --- > > > drivers/virtio/virtio_ring.c | 70 > > > include/linux/virtio.h | 8 + > > > 2 files changed, 78 insertions(+) > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > > index 855338609c7f..84129b8c3e2a 100644 > > > --- a/drivers/virtio/virtio_ring.c > > > +++ b/drivers/virtio/virtio_ring.c > > > @@ -3264,4 +3264,74 @@ void virtio_dma_unmap(struct device *dev, > > > dma_addr_t dma, unsigned int length, > > > } > > > EXPORT_SYMBOL_GPL(virtio_dma_unmap); > > > > > > +/** > > > + * virtio_dma_need_sync - check a dma address needs sync > > > + * @dev: virtio device > > > + * @addr: DMA address > > > + * > > > + * This API is only for pre-mapped buffers, for non premapped buffers > > > virtio > > > + * core handles DMA API internally. > > > + */ > > > +bool virtio_dma_need_sync(struct device *dev, dma_addr_t addr) > > > +{ > > > + struct virtio_device *vdev = dev_to_virtio(dev); > > > + > > > + if (!vring_use_dma_api(vdev)) > > > + return 0; > > > + > > > + return dma_need_sync(vdev->dev.parent, addr); > > > +} > > > +EXPORT_SYMBOL_GPL(virtio_dma_need_sync); > > > + > > > +/** > > > + * virtio_dma_sync_single_range_for_cpu - dma sync for cpu > > > + * @dev: virtio device > > > + * @addr: DMA address > > > + * @offset: DMA address offset > > > + * @size: mem size for sync > > > + * @dir: DMA direction > > > + * > > > + * Before calling this function, use virtio_dma_need_sync() to confirm > > > that the > > > + * DMA address really needs to be synchronized > > > + * > > > + * This API is only for pre-mapped buffers, for non premapped buffers > > > virtio > > > + * core handles DMA API internally. > > > + */ > > > +void virtio_dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t > > > addr, > > > + unsigned long offset, size_t > > > size, > > > + enum dma_data_direction dir) > > > +{ > > > + struct virtio_device *vdev = dev_to_virtio(dev); > > > + > > > + dma_sync_single_range_for_cpu(vdev->dev.parent, addr, offset, > > > + size, DMA_BIDIRECTIONAL); > > > +} > > > +EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_cpu); > > > + > > > +/** > > > + * virtio_dma_sync_single_range_for_device - dma sync for device > > > + * @dev: virtio device > > > + * @addr: DMA address > > > + * @offset: DMA address offset > > > + * @size: mem size for sync > > > + * @dir: DMA direction > > > + * > > > + * Before calling this function, use virtio_dma_need_sync() to confirm > > > that the > > > + * DMA address really needs to be synchronized > > > + * > > > + * This API is only for pre-mapped buffers, for non premapped buffers > > > virtio > > > + * core handles DMA API internally. > > > + */ > > > +void virtio_dma_sync_single_range_for_device(struct device *dev, > > > +dma_addr_t addr, > > > +unsigned long offset, size_t > > > size, > > > +enum dma_data_direction dir) > > > +{ > > > + struct virtio_device *vdev = dev_to_virtio(dev); > > > + > > > + dma_sync_single_range_for_device(vdev->dev.parent, addr, offset, > > > +size, DMA_BIDIRECTIONAL); > > > +} > > > +EXPORT_SYMBOL_GPL(virtio_dma_sync_single_range_for_device); > > > + > > > MODULE_LICENSE("GPL"); > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h > > > index b5fa71476737..d0e707d744a0 100644 > > > --- a/include/linux/virtio.h > > > +++ b/include/linux/virtio.h > > > @@ -225,4 +225,12 @@ dma_addr_t virtio_dma_map(struct device *dev, void > > > *addr, unsigned int length, > > > int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr); > > > void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int > > > length, > > > enum dma_data_direction dir); > > > +bool virtio_dma_need_sync(struct device *dev,
Re: [PATCH vhost 10/10] virtio_ring: introduce virtqueue_reset()
On Mon, Feb 20, 2023 at 3:04 PM Xuan Zhuo wrote: > > On Mon, 20 Feb 2023 13:38:30 +0800, Jason Wang wrote: > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo > > wrote: > > > > > > Introduce virtqueue_reset() to release all buffer inside vq. > > > > > > Signed-off-by: Xuan Zhuo > > > --- > > > drivers/virtio/virtio_ring.c | 50 > > > include/linux/virtio.h | 2 ++ > > > 2 files changed, 52 insertions(+) > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > > index 2ba60a14f557..2750a365439a 100644 > > > --- a/drivers/virtio/virtio_ring.c > > > +++ b/drivers/virtio/virtio_ring.c > > > @@ -2930,6 +2930,56 @@ int virtqueue_resize(struct virtqueue *_vq, u32 > > > num, > > > } > > > EXPORT_SYMBOL_GPL(virtqueue_resize); > > > > > > +/** > > > + * virtqueue_reset - detach and recycle all unused buffers > > > + * @_vq: the struct virtqueue we're talking about. > > > + * @recycle: callback to recycle unused buffers > > > + * > > > + * Caller must ensure we don't call this with other virtqueue operations > > > + * at the same time (except where noted). > > > + * > > > + * Returns zero or a negative error. > > > + * 0: success. > > > + * -EBUSY: Failed to sync with device, vq may not work properly > > > + * -ENOENT: Transport or device not supported > > > + * -EPERM: Operation not permitted > > > + */ > > > +int virtqueue_reset(struct virtqueue *_vq, > > > + void (*recycle)(struct virtqueue *vq, void *buf)) > > > +{ > > > + struct vring_virtqueue *vq = to_vvq(_vq); > > > + struct virtio_device *vdev = vq->vq.vdev; > > > + void *buf; > > > + int err; > > > + > > > + if (!vq->we_own_ring) > > > + return -EPERM; > > > + > > > + if (!vdev->config->disable_vq_and_reset) > > > + return -ENOENT; > > > + > > > + if (!vdev->config->enable_vq_after_reset) > > > + return -ENOENT; > > > + > > > + err = vdev->config->disable_vq_and_reset(_vq); > > > + if (err) > > > + return err; > > > + > > > + while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) > > > + recycle(_vq, buf); > > > + > > > + if (vq->packed_ring) > > > + virtqueue_reinit_packed(vq); > > > + else > > > + virtqueue_reinit_split(vq); > > > + > > > + if (vdev->config->enable_vq_after_reset(_vq)) > > > + return -EBUSY; > > > + > > > + return 0; > > > +} > > > > I don't get why not factor the similar logic from virtqueue_resize()? > > > I can do this, if you prefer this. > > THanks. Please do that, reset is a step of resize if I understand correctly. Thanks > > > > > > > Thanks > > > > > > > +EXPORT_SYMBOL_GPL(virtqueue_reset); > > > + > > > /* Only available for split ring */ > > > struct virtqueue *vring_new_virtqueue(unsigned int index, > > > unsigned int num, > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h > > > index d0e707d744a0..cf4c157e4e75 100644 > > > --- a/include/linux/virtio.h > > > +++ b/include/linux/virtio.h > > > @@ -106,6 +106,8 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue > > > *vq); > > > > > > int virtqueue_resize(struct virtqueue *vq, u32 num, > > > void (*recycle)(struct virtqueue *vq, void *buf)); > > > +int virtqueue_reset(struct virtqueue *vq, > > > + void (*recycle)(struct virtqueue *vq, void *buf)); > > > > > > /** > > > * struct virtio_device - representation of a device using virtio > > > -- > > > 2.32.0.3.g01195cf9f > > > > > > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH vhost 07/10] virtio_ring: add api virtio_dma_map() for advance dma
On Mon, Feb 20, 2023 at 3:02 PM Xuan Zhuo wrote: > > On Mon, 20 Feb 2023 13:38:24 +0800, Jason Wang wrote: > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo > > wrote: > > > > > > Added virtio_dma_map() to map DMA addresses for virtual memory in > > > advance. The purpose is to keep memory mapped across multiple add/get > > > buf operations. > > > > I wonder if instead of exporting helpers like this, it might be simple > > to just export dma_dev then the upper layer can use DMA API at will? > > > The reason for not doing this, Virtio is not just using DMA_DEV to mapp, but > also check whether DMA is used. We should let the DMA API decide by exporting a correct dma_dev. E.g when ACCESS_PLATFORM is not negotiated, advertising a DMA dev without dma_ops. Thanks > > > > > > (Otherwise the DMA helpers need to grow/shrink as the DMA API evolves?) > > > > > > > > Added virtio_dma_unmap() for unmap DMA address. > > > > > > Signed-off-by: Xuan Zhuo > > > --- > > > drivers/virtio/virtio_ring.c | 92 > > > include/linux/virtio.h | 9 > > > 2 files changed, 101 insertions(+) > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > > index cd9364eb2345..855338609c7f 100644 > > > --- a/drivers/virtio/virtio_ring.c > > > +++ b/drivers/virtio/virtio_ring.c > > > @@ -3172,4 +3172,96 @@ const struct vring *virtqueue_get_vring(struct > > > virtqueue *vq) > > > } > > > EXPORT_SYMBOL_GPL(virtqueue_get_vring); > > > > > > +/** > > > + * virtio_dma_map_page - get the DMA addr of the memory for virtio device > > > + * @dev: virtio device > > > + * @page: the page of the memory to DMA > > > + * @offset: the offset of the memory inside page > > > + * @length: memory length > > > + * @dir: DMA direction > > > + * > > > + * This API is only for pre-mapped buffers, for non premapped buffers > > > virtio > > > + * core handles DMA API internally. > > > + * > > > + * Returns the DMA addr. DMA_MAPPING_ERROR means error. > > > + */ > > > +dma_addr_t virtio_dma_map_page(struct device *dev, struct page *page, > > > size_t offset, > > > + unsigned int length, enum > > > dma_data_direction dir) > > > +{ > > > > This (and the reset) needs to be done per virtqueue instead per device > > after b0e504e5505d184b0be248b7dcdbe50b79f03758 ("virtio_ring: per > > virtqueue dma device"). > > > YES. > > > > > > > + struct virtio_device *vdev = dev_to_virtio(dev); > > > + > > > + if (!vring_use_dma_api(vdev)) > > > + return page_to_phys(page) + offset; > > > + > > > + return dma_map_page(vdev->dev.parent, page, offset, length, dir); > > > +} > > > > Need either inline or EXPORT_SYMBOL_GPL() here. > > Because I did not use this interface, I did not export it. > > Thanks. > > > > > > Thanks > > > > > > > + > > > +/** > > > + * virtio_dma_map - get the DMA addr of the memory for virtio device > > > + * @dev: virtio device > > > + * @addr: the addr to DMA > > > + * @length: memory length > > > + * @dir: DMA direction > > > + * > > > + * This API is only for pre-mapped buffers, for non premapped buffers > > > virtio > > > + * core handles DMA API internally. > > > + * > > > + * Returns the DMA addr. > > > + */ > > > +dma_addr_t virtio_dma_map(struct device *dev, void *addr, unsigned int > > > length, > > > + enum dma_data_direction dir) > > > +{ > > > + struct page *page; > > > + size_t offset; > > > + > > > + page = virt_to_page(addr); > > > + offset = offset_in_page(addr); > > > + > > > + return virtio_dma_map_page(dev, page, offset, length, dir); > > > +} > > > +EXPORT_SYMBOL_GPL(virtio_dma_map); > > > + > > > +/** > > > + * virtio_dma_mapping_error - check dma address > > > + * @dev: virtio device > > > + * @addr: DMA address > > > + * > > > + * This API is only for pre-mapped buffers, for non premapped buffers > > > virtio > > > + * core handles DMA API internally. > > > + * > > > + * Returns 0 means dma valid. Other means invalid dma address. > > > + */ > > > +int virtio_dma_mapping_error(struct device *dev, dma_addr_t addr) > > > +{ > > > + struct virtio_device *vdev = dev_to_virtio(dev); > > > + > > > + if (!vring_use_dma_api(vdev)) > > > + return 0; > > > + > > > + return dma_mapping_error(vdev->dev.parent, addr); > > > +} > > > +EXPORT_SYMBOL_GPL(virtio_dma_mapping_error); > > > + > > > +/** > > > + * virtio_dma_unmap - unmap DMA addr > > > + * @dev: virtio device > > > + * @dma: DMA address > > > + * @length: memory length > > > + * @dir: DMA direction > > > + * > > > + * This API is only for pre-mapped buffers, for non premapped buffers > > > virtio > > > + * core handles DMA API internally. > > > + */ > > > +void virtio_dma_unmap(struct device *dev, dma_addr_t dma, unsigned int > > > length, > > > + enum dma_data_direction dir) > > > +{ > > > + struct virtio_device *vdev =
Re: [PATCH vhost 04/10] virtio_ring: split: introduce virtqueue_add_split_premapped()
On Mon, Feb 20, 2023 at 2:56 PM Xuan Zhuo wrote: > > On Mon, 20 Feb 2023 13:38:13 +0800, Jason Wang wrote: > > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo > > wrote: > > > > > > virtqueue_add_split() only supports virtual addresses, dma is completed > > > in virtqueue_add_split(). > > > > > > In some scenarios (such as the AF_XDP scenario), the memory is allocated > > > and DMA is completed in advance, so it is necessary for us to support > > > passing the DMA address to virtio core. > > > > > > Signed-off-by: Xuan Zhuo > > > --- > > > drivers/virtio/virtio_ring.c | 100 +-- > > > include/linux/virtio.h | 5 ++ > > > 2 files changed, 100 insertions(+), 5 deletions(-) > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > > index 47b6f9152f9f..a31155abe101 100644 > > > --- a/drivers/virtio/virtio_ring.c > > > +++ b/drivers/virtio/virtio_ring.c > > > @@ -70,6 +70,7 @@ > > > struct vring_desc_state_split { > > > void *data; /* Data for callback. */ > > > struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ > > > + bool premapped; > > > > Better with a comment. > > > > Not native speaker, but "dma_addr" might be better? > > > > > }; > > > > > > struct vring_desc_state_packed { > > > @@ -440,7 +441,7 @@ static void vring_unmap_one_split_indirect(const > > > struct vring_virtqueue *vq, > > > } > > > > > > static unsigned int vring_unmap_one_split(const struct vring_virtqueue > > > *vq, > > > - unsigned int i) > > > + unsigned int i, bool premapped) > > > { > > > struct vring_desc_extra *extra = vq->split.desc_extra; > > > u16 flags; > > > @@ -457,6 +458,9 @@ static unsigned int vring_unmap_one_split(const > > > struct vring_virtqueue *vq, > > > (flags & VRING_DESC_F_WRITE) ? > > > DMA_FROM_DEVICE : DMA_TO_DEVICE); > > > } else { > > > + if (premapped) > > > + goto out; > > > + > > > dma_unmap_page(vring_dma_dev(vq), > > >extra[i].addr, > > >extra[i].len, > > > @@ -788,6 +792,47 @@ static inline int virtqueue_add_split(struct > > > virtqueue *_vq, > > > return err; > > > } > > > > > > +static inline int virtqueue_add_split_premapped(struct virtqueue *_vq, > > > + struct scatterlist *sgs[], > > > + unsigned int total_sg, > > > + unsigned int out_sgs, > > > + unsigned int in_sgs, > > > + void *data, > > > + void *ctx, > > > + gfp_t gfp) > > > +{ > > > + struct vring_virtqueue *vq = to_vvq(_vq); > > > + struct vring_desc *desc; > > > + int head; > > > + int err; > > > + > > > + START_USE(vq); > > > + > > > + /* check vq state and try to alloc desc for indirect. */ > > > + err = virtqueue_add_split_prepare(vq, total_sg, out_sgs, data, > > > ctx, gfp, ); > > > + if (err) > > > + goto end; > > > + > > > + head = vq->free_head; > > > + err = virtqueue_add_split_vring(vq, sgs, total_sg, out_sgs, > > > in_sgs, desc); > > > + if (err) > > > + goto err; > > > + > > > + /* Store token and indirect buffer state. */ > > > + vq->split.desc_state[head].data = data; > > > + vq->split.desc_state[head].indir_desc = desc ? desc : ctx; > > > + vq->split.desc_state[head].premapped = true; > > > > This function duplicates most of the logic of virtqueue_add_split() > > let's unify it. > > I want to know that the __virtqueue_add_split is the original > virtqueue_add_split or my refactor virtqueue_add_split? It's basically just the virtqueue_add_split_premapped() but with a boolean to say if it is premapped. > > > > > probably: > > > > __virtqueue_add_split(..., bool premapped); > > virtqueue_add_split() > > { > > __virtqueue_add_split(..., false); > > } > > > > virtqueue_add_split_premapped() > > { > >__virtqueue_add_split(..., true); > > } > > I am trying to reduce the inspection of premapped. > > In fact, this is Michael's request, although I am not particularly sure that > my > implementation has met his requirements. > > https://lore.kernel.org/all/20230203041006-mutt-send-email-...@kernel.org/ I think there should be no conflict, the use of premapped was limited to the above two functions? Thanks > > Thanks. > > > > > > ? > > > > And so did for packed (patch 5). > > > > Thanks > > > > > > > > > + > > > + goto end; > > > + > > > +err: > > > +
[GIT PULL] virtio,vhost,vdpa: features, fixes
The following changes since commit ceaa837f96adb69c0df0397937cd74991d5d821a: Linux 6.2-rc8 (2023-02-12 14:10:17 -0800) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus for you to fetch changes up to deeacf35c922da579637f5db625af20baafc66ed: vdpa/mlx5: support device features provisioning (2023-02-20 19:27:00 -0500) Note: dropped a patch close to the bottom of the stack at the last minute so the commits differ but all of these have been in next already. The dropped patch just added a new query ioctl so not interacting with anything else in the pull. virtio,vhost,vdpa: features, fixes device feature provisioning in ifcvf, mlx5 new SolidNET driver support for zoned block device in virtio blk numa support in virtio pmem VIRTIO_F_RING_RESET support in vhost-net more debugfs entries in mlx5 resume support in vdpa completion batching in virtio blk cleanup of dma api use in vdpa now simulating more features in vdpa-sim documentation, features, fixes all over the place Signed-off-by: Michael S. Tsirkin Alvaro Karsz (4): PCI: Add SolidRun vendor ID PCI: Avoid FLR for SolidRun SNET DPU rev 1 virtio: vdpa: new SolidNET DPU driver. vhost-vdpa: print warning when vhost_vdpa_alloc_domain fails Bagas Sanjaya (3): docs: driver-api: virtio: parenthesize external reference targets docs: driver-api: virtio: slightly reword virtqueues allocation paragraph docs: driver-api: virtio: commentize spec version checking Bo Liu (1): vhost-scsi: convert sysfs snprintf and sprintf to sysfs_emit Colin Ian King (1): vdpa: Fix a couple of spelling mistakes in some messages Dmitry Fomichev (1): virtio-blk: add support for zoned block devices Eli Cohen (6): vdpa/mlx5: Move some definitions to a new header file vdpa/mlx5: Add debugfs subtree vdpa/mlx5: Add RX counters to debugfs vdpa/mlx5: Directly assign memory key vdpa/mlx5: Don't clear mr struct on destroy MR vdpa/mlx5: Initialize CVQ iotlb spinlock Eugenio Pérez (2): vdpa_sim: not reset state in vdpasim_queue_ready vdpa_sim_net: Offer VIRTIO_NET_F_STATUS Jason Wang (11): vdpa_sim: use weak barriers vdpa_sim: switch to use __vdpa_alloc_device() vdpasim: customize allocation size vdpa_sim: support vendor statistics vdpa_sim_net: vendor satistics vdpa_sim: get rid of DMA ops virtio_ring: per virtqueue dma device vdpa: introduce get_vq_dma_device() virtio-vdpa: support per vq dma device vdpa: set dma mask for vDPA device vdpa: mlx5: support per virtqueue dma device Kangjie Xu (1): vhost-net: support VIRTIO_F_RING_RESET Liming Wu (2): vhost-test: remove meaningless debug info vhost: remove unused paramete Michael S. Tsirkin (3): virtio_blk: temporary variable type tweak virtio_blk: zone append in header type tweak virtio_blk: mark all zone fields LE Michael Sammler (1): virtio_pmem: populate numa information Ricardo Cañuelo (1): docs: driver-api: virtio: virtio on Linux Sebastien Boeuf (4): vdpa: Add resume operation vhost-vdpa: Introduce RESUME backend feature bit vhost-vdpa: uAPI to resume the device vdpa_sim: Implement resume vdpa op Shunsuke Mie (2): vringh: fix a typo in comments for vringh_kiov tools/virtio: enable to build with retpoline Si-Wei Liu (6): vdpa: fix improper error message when adding vdpa dev vdpa: conditionally read STATUS in config space vdpa: validate provisioned device features against specified attribute vdpa: validate device feature provisioning against supported class vdpa/mlx5: make MTU/STATUS presence conditional on feature bits vdpa/mlx5: support device features provisioning Suwan Kim (2): virtio-blk: set req->state to MQ_RQ_COMPLETE after polling I/O is finished virtio-blk: support completion batching for the IRQ path Zheng Wang (1): scsi: virtio_scsi: fix handling of kmalloc failure Zhu Lingshan (12): vDPA/ifcvf: decouple hw features manipulators from the adapter vDPA/ifcvf: decouple config space ops from the adapter vDPA/ifcvf: alloc the mgmt_dev before the adapter vDPA/ifcvf: decouple vq IRQ releasers from the adapter vDPA/ifcvf: decouple config IRQ releaser from the adapter vDPA/ifcvf: decouple vq irq requester from the adapter vDPA/ifcvf: decouple config/dev IRQ requester and vectors allocator from the adapter vDPA/ifcvf: ifcvf_request_irq works on ifcvf_hw vDPA/ifcvf: manage ifcvf_hw in the mgmt_dev vDPA/ifcvf: allocate the adapter in dev_add() vDPA/ifcvf: retire ifcvf_private_to_vf vDPA/ifcvf: implement features provisioning
Re: [patch net-next] net: virtio_net: implement exact header length guest feature
On Mon, Feb 20, 2023 at 02:56:08PM +0100, Jiri Pirko wrote: > Mon, Feb 20, 2023 at 01:55:33PM CET, m...@redhat.com wrote: > >On Mon, Feb 20, 2023 at 09:35:00AM +0100, Jiri Pirko wrote: > >> Fri, Feb 17, 2023 at 02:47:36PM CET, m...@redhat.com wrote: > >> >On Fri, Feb 17, 2023 at 01:53:55PM +0100, Jiri Pirko wrote: > >> >> Fri, Feb 17, 2023 at 01:22:01PM CET, m...@redhat.com wrote: > >> >> >On Fri, Feb 17, 2023 at 01:15:47PM +0100, Jiri Pirko wrote: > >> >> >> From: Jiri Pirko > >> >> >> > >> >> >> virtio_net_hdr_from_skb() fills up hdr_len to skb_headlen(skb). > >> >> >> > >> >> >> Virtio spec introduced a feature VIRTIO_NET_F_GUEST_HDRLEN which when > >> >> >> set implicates that the driver provides the exact size of the header. > >> >> >> > >> >> >> The driver already complies to fill the correct value. Introduce the > >> >> >> feature and advertise it. > >> >> >> > >> >> >> Signed-off-by: Jiri Pirko > >> >> > > >> >> >Could you add a bit of motivation just for the record? > >> >> >Does this improve performance for some card? By how much? > >> >> >Expected to help some future card? > >> >> > >> >> I can get that info, but isn't that rather something to be appended to > >> >> the virtio-spec patch? I mean, the feature is there, this is just > >> >> implementing it in one driver. > >> > > >> >It is more like using it in the driver. It's not like we have to use > >> >everything - it could be useful for e.g. dpdk but not linux. > >> >Implementing it in the Linux driver has support costs - for example what > >> >if there's a bug and sometimes the length is incorrect? > >> >We'll be breaking things. > >> > >> I understand. To my understanding this feature just fixes the original > >> ambiguity in the virtio spec. > >> > >> Quoting the original virtio spec: > >> "hdr_len is a hint to the device as to how much of the header needs to > >> be kept to copy into each packet" > >> > >> "a hint" might not be clear for the reader what does it mean, if it is > >> "maybe like that" of "exactly like that". This feature just makes it > >> crystal clear. > >> > >> If you look at the tap implementation, it uses hdr_len to alloc > >> skb linear part. No hint, it counts with the provided value. > >> So if the driver is currently not precise, it breaks tap. > > > >Well that's only for gso though right? > > Yep. > > > >And making it bigger than necessary works fine ... > > Well yeah. But tap does not do that, does it? it uses hdr_len directly. > I mean if hdr_len is bigger than necessary tap does work. > > > >> I will add this to the patch description and send v2. > >> > > > >I feel this does not answer the question yet, or maybe I am being dense. > >My point was not about making hdr_len precise. My point was that we are > >making a change here for no apparent reason. I am guessing you are not > >doing it for fun - so why? Is there a device with this feature bit > >you are aware of? > > Afaik real hw which does emulation of virtio_net would benefit from > that, our hw including. OK so do you have hardware which exposes this feature? That is the bit I am missing. Maybe mention the make in the commit log so we know where to turn if we need to make changes here? Or "under development" if it is not on the market yet. > > > > > > > > >> > >> > > >> >The patch was submitted by Marvell but they never bothered with > >> >using it in Linux. I guess they are using it for something else? > >> >CC Vitaly who put this in. > >> > > >> >> > >> >> > > >> >> >thanks! > >> >> > > >> >> > > >> >> >> --- > >> >> >> drivers/net/virtio_net.c| 6 -- > >> >> >> include/uapi/linux/virtio_net.h | 1 + > >> >> >> 2 files changed, 5 insertions(+), 2 deletions(-) > >> >> >> > >> >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > >> >> >> index fb5e68ed3ec2..e85b03988733 100644 > >> >> >> --- a/drivers/net/virtio_net.c > >> >> >> +++ b/drivers/net/virtio_net.c > >> >> >> @@ -62,7 +62,8 @@ static const unsigned long guest_offloads[] = { > >> >> >> VIRTIO_NET_F_GUEST_UFO, > >> >> >> VIRTIO_NET_F_GUEST_CSUM, > >> >> >> VIRTIO_NET_F_GUEST_USO4, > >> >> >> -VIRTIO_NET_F_GUEST_USO6 > >> >> >> +VIRTIO_NET_F_GUEST_USO6, > >> >> >> +VIRTIO_NET_F_GUEST_HDRLEN > >> >> >> }; > >> >> >> > >> >> >> #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << > >> >> >> VIRTIO_NET_F_GUEST_TSO4) | \ > >> >> >> @@ -4213,7 +4214,8 @@ static struct virtio_device_id id_table[] = { > >> >> >> VIRTIO_NET_F_CTRL_MAC_ADDR, \ > >> >> >> VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > >> >> >> VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > >> >> >> -VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, > >> >> >> VIRTIO_NET_F_NOTF_COAL > >> >> >> +VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, > >> >> >> VIRTIO_NET_F_NOTF_COAL, \ > >> >> >> +VIRTIO_NET_F_GUEST_HDRLEN > >> >> >> > >> >> >> static unsigned int features[] = { > >> >> >> VIRTNET_FEATURES, > >> >> >> diff
[linux-next:master] BUILD REGRESSION d2af0fa4bfa4ec29d03b449ccd43fee39501112d
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master branch HEAD: d2af0fa4bfa4ec29d03b449ccd43fee39501112d Add linux-next specific files for 20230220 Error/Warning reports: https://lore.kernel.org/oe-kbuild-all/202302062224.byzetxh1-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302092211.54eydhyh-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302111601.jty4lkra-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302170355.ljqlzucu-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302210017.xt59wvsm-...@intel.com https://lore.kernel.org/oe-kbuild-all/202302210350.lynwcl4t-...@intel.com Error/Warning: (recently discovered and may have been fixed) Documentation/sphinx/templates/kernel-toc.html: 1:36 Invalid token: #} ERROR: modpost: "__umoddi3" [fs/btrfs/btrfs.ko] undefined! ERROR: modpost: "devm_platform_ioremap_resource" [drivers/dma/fsl-edma.ko] undefined! ERROR: modpost: "devm_platform_ioremap_resource" [drivers/dma/idma64.ko] undefined! FAILED: load BTF from vmlinux: No data available drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_optc.c:294:6: warning: no previous prototype for 'optc3_wait_drr_doublebuffer_pending_clear' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_resource_helpers.c:62:18: warning: variable 'cursor_bpp' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_detection.c:1199: warning: expecting prototype for dc_link_detect_connection_type(). Prototype was for link_detect_connection_type() instead drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_capability.c:1292:32: warning: variable 'result_write_min_hblank' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training.c:1586:38: warning: variable 'result' set but not used [-Wunused-but-set-variable] drivers/net/ethernet/sfc/ef100_nic.c:1197:9: warning: variable 'rc' is uninitialized when used here [-Wuninitialized] drivers/net/ethernet/sfc/efx_devlink.c:326:58: error: expected ')' before 'build_id' drivers/net/ethernet/sfc/efx_devlink.c:338:55: error: expected ';' before '}' token drivers/of/unittest.c:3042:41: error: 'struct device_node' has no member named 'kobj' drivers/pwm/pwm-dwc.c:314:1: error: type defaults to 'int' in declaration of 'module_pci_driver' [-Werror=implicit-int] include/asm-generic/div64.h:238:36: error: passing argument 1 of '__div64_32' from incompatible pointer type [-Werror=incompatible-pointer-types] Unverified Error/Warning (likely false positive, please contact us if interested): drivers/infiniband/hw/hfi1/verbs.c:1661 hfi1_alloc_hw_device_stats() error: we previously assumed 'dev_cntr_descs' could be null (see line 1650) drivers/net/phy/phy-c45.c:712 genphy_c45_write_eee_adv() error: uninitialized symbol 'changed'. drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c:1678 rtl8188e_handle_ra_tx_report2() warn: ignoring unreachable code. drivers/usb/gadget/composite.c:2082:33: sparse: sparse: restricted __le16 degrades to integer drivers/virtio/virtio_ring.c:1585 virtqueue_add_packed_vring() error: uninitialized symbol 'prev'. drivers/virtio/virtio_ring.c:1593 virtqueue_add_packed_vring() error: uninitialized symbol 'head_flags'. drivers/virtio/virtio_ring.c:697 virtqueue_add_split_vring() error: uninitialized symbol 'prev'. pahole: .tmp_vmlinux.btf: No such file or directory Error/Warning ids grouped by kconfigs: gcc_recent_errors |-- alpha-allyesconfig | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_detection.c:warning:expecting-prototype-for-dc_link_detect_connection_type().-Prototype-was-for-link_detect_connection_type()-instead | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-result_write_min_hblank-set-but-not-used | `-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_training.c:warning:variable-result-set-but-not-used |-- alpha-buildonly-randconfig-r006-20230219 | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_detection.c:warning:expecting-prototype-for-dc_link_detect_connection_type().-Prototype-was-for-link_detect_connection_type()-instead | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-result_write_min_hblank-set-but-not-used | `-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_training.c:warning:variable-result-set-but-not-used |-- arc-allyesconfig | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-link_detection.c:warning:expecting-prototype-for-dc_link_detect_connection_type().-Prototype-was-for-link_detect_connection_type()-instead | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_capability.c:warning:variable-result_write_min_hblank-set-but-not-used | |-- drivers-gpu-drm-amd-amdgpu-..-display-dc-link-protocols-link_dp_trai
Re: [PATCH 17/17] drm/cirrus: Use VGA macro constants to unblank
On Mon, Feb 20, 2023 at 03:22:03PM +0100, Thomas Zimmermann wrote: > Hi > > Am 16.02.23 um 12:33 schrieb Gerd Hoffmann: > > On Wed, Feb 15, 2023 at 05:15:17PM +0100, Thomas Zimmermann wrote: > > > Set the VGA bit for unblanking with macro constants instead of magic > > > values. No functional changes. > > > > blank/unblank should work simliar to bochs (see commit 250e743915d4), > > that is maybe a nice thing to add of you modernize the driver anyway. > > > > take care, > >Gerd > > > > Do you have comments on the other patches? Checked briefly only, looked sane overall. Seems the blit and format conversions helpers improved alot since I've added them initially (don't follow drm that closely any more, busy with other stuff), nice to see cirrus being updated to that and getting dirty tracking support. Acked-by: Gerd Hoffmann take care, Gerd ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2] vhost/vdpa: Add MSI translation tables to iommu for software-managed MSI
On Mon, Feb 20, 2023 at 10:36:27AM +0800, Jason Wang wrote: > On Fri, Feb 17, 2023 at 6:11 PM Michael S. Tsirkin wrote: > > > > On Fri, Feb 17, 2023 at 01:35:59PM +0800, Jason Wang wrote: > > > On Fri, Feb 17, 2023 at 8:15 AM Jason Gunthorpe wrote: > > > > > > > > On Tue, Feb 07, 2023 at 08:08:43PM +0800, Nanyong Sun wrote: > > > > > From: Rong Wang > > > > > > > > > > Once enable iommu domain for one device, the MSI > > > > > translation tables have to be there for software-managed MSI. > > > > > Otherwise, platform with software-managed MSI without an > > > > > irq bypass function, can not get a correct memory write event > > > > > from pcie, will not get irqs. > > > > > The solution is to obtain the MSI phy base address from > > > > > iommu reserved region, and set it to iommu MSI cookie, > > > > > then translation tables will be created while request irq. > > > > > > > > Probably not what anyone wants to hear, but I would prefer we not add > > > > more uses of this stuff. It looks like we have to get rid of > > > > iommu_get_msi_cookie() :\ > > > > > > > > I'd like it if vdpa could move to iommufd not keep copying stuff from > > > > it.. > > > > > > Yes, but we probably need a patch for -stable. > > > > Hmm do we? this looks like it's enabling new platforms is not a bugfix... > > I think we haven't limited vDPA to any specific arch in the past? > > Thanks No, but it still fails gracefully right? Anyway, this will need iommu maintainer's ack. We'll see. > > > > > > > > > > Also the iommu_group_has_isolated_msi() check is missing on the vdpa > > > > path, and it is missing the iommu ownership mechanism. > > > > > > Ok. > > > > > > > > > > > Also which in-tree VDPA driver that uses the iommu runs on ARM? Please > > > > > > ifcvf and vp_vpda are two drivers that use platform IOMMU. > > > > > > Thanks > > > > > > > don't propose core changes for unmerged drivers. :( > > > > > > > > Jason > > > > > > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 17/17] drm/cirrus: Use VGA macro constants to unblank
Hi Am 16.02.23 um 12:33 schrieb Gerd Hoffmann: On Wed, Feb 15, 2023 at 05:15:17PM +0100, Thomas Zimmermann wrote: Set the VGA bit for unblanking with macro constants instead of magic values. No functional changes. blank/unblank should work simliar to bochs (see commit 250e743915d4), that is maybe a nice thing to add of you modernize the driver anyway. take care, Gerd Do you have comments on the other patches? Best regards Thomas -- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Maxfeldstr. 5, 90409 Nürnberg, Germany (HRB 36809, AG Nürnberg) Geschäftsführer: Ivo Totev OpenPGP_signature Description: OpenPGP digital signature ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [patch net-next] net: virtio_net: implement exact header length guest feature
Mon, Feb 20, 2023 at 01:55:33PM CET, m...@redhat.com wrote: >On Mon, Feb 20, 2023 at 09:35:00AM +0100, Jiri Pirko wrote: >> Fri, Feb 17, 2023 at 02:47:36PM CET, m...@redhat.com wrote: >> >On Fri, Feb 17, 2023 at 01:53:55PM +0100, Jiri Pirko wrote: >> >> Fri, Feb 17, 2023 at 01:22:01PM CET, m...@redhat.com wrote: >> >> >On Fri, Feb 17, 2023 at 01:15:47PM +0100, Jiri Pirko wrote: >> >> >> From: Jiri Pirko >> >> >> >> >> >> virtio_net_hdr_from_skb() fills up hdr_len to skb_headlen(skb). >> >> >> >> >> >> Virtio spec introduced a feature VIRTIO_NET_F_GUEST_HDRLEN which when >> >> >> set implicates that the driver provides the exact size of the header. >> >> >> >> >> >> The driver already complies to fill the correct value. Introduce the >> >> >> feature and advertise it. >> >> >> >> >> >> Signed-off-by: Jiri Pirko >> >> > >> >> >Could you add a bit of motivation just for the record? >> >> >Does this improve performance for some card? By how much? >> >> >Expected to help some future card? >> >> >> >> I can get that info, but isn't that rather something to be appended to >> >> the virtio-spec patch? I mean, the feature is there, this is just >> >> implementing it in one driver. >> > >> >It is more like using it in the driver. It's not like we have to use >> >everything - it could be useful for e.g. dpdk but not linux. >> >Implementing it in the Linux driver has support costs - for example what >> >if there's a bug and sometimes the length is incorrect? >> >We'll be breaking things. >> >> I understand. To my understanding this feature just fixes the original >> ambiguity in the virtio spec. >> >> Quoting the original virtio spec: >> "hdr_len is a hint to the device as to how much of the header needs to >> be kept to copy into each packet" >> >> "a hint" might not be clear for the reader what does it mean, if it is >> "maybe like that" of "exactly like that". This feature just makes it >> crystal clear. >> >> If you look at the tap implementation, it uses hdr_len to alloc >> skb linear part. No hint, it counts with the provided value. >> So if the driver is currently not precise, it breaks tap. > >Well that's only for gso though right? Yep. >And making it bigger than necessary works fine ... Well yeah. But tap does not do that, does it? it uses hdr_len directly. > >> I will add this to the patch description and send v2. >> > >I feel this does not answer the question yet, or maybe I am being dense. >My point was not about making hdr_len precise. My point was that we are >making a change here for no apparent reason. I am guessing you are not >doing it for fun - so why? Is there a device with this feature bit >you are aware of? Afaik real hw which does emulation of virtio_net would benefit from that, our hw including. > > > >> >> > >> >The patch was submitted by Marvell but they never bothered with >> >using it in Linux. I guess they are using it for something else? >> >CC Vitaly who put this in. >> > >> >> >> >> > >> >> >thanks! >> >> > >> >> > >> >> >> --- >> >> >> drivers/net/virtio_net.c| 6 -- >> >> >> include/uapi/linux/virtio_net.h | 1 + >> >> >> 2 files changed, 5 insertions(+), 2 deletions(-) >> >> >> >> >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >> >> >> index fb5e68ed3ec2..e85b03988733 100644 >> >> >> --- a/drivers/net/virtio_net.c >> >> >> +++ b/drivers/net/virtio_net.c >> >> >> @@ -62,7 +62,8 @@ static const unsigned long guest_offloads[] = { >> >> >>VIRTIO_NET_F_GUEST_UFO, >> >> >>VIRTIO_NET_F_GUEST_CSUM, >> >> >>VIRTIO_NET_F_GUEST_USO4, >> >> >> - VIRTIO_NET_F_GUEST_USO6 >> >> >> + VIRTIO_NET_F_GUEST_USO6, >> >> >> + VIRTIO_NET_F_GUEST_HDRLEN >> >> >> }; >> >> >> >> >> >> #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) >> >> >> | \ >> >> >> @@ -4213,7 +4214,8 @@ static struct virtio_device_id id_table[] = { >> >> >>VIRTIO_NET_F_CTRL_MAC_ADDR, \ >> >> >>VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ >> >> >>VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ >> >> >> - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, >> >> >> VIRTIO_NET_F_NOTF_COAL >> >> >> + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, >> >> >> VIRTIO_NET_F_NOTF_COAL, \ >> >> >> + VIRTIO_NET_F_GUEST_HDRLEN >> >> >> >> >> >> static unsigned int features[] = { >> >> >>VIRTNET_FEATURES, >> >> >> diff --git a/include/uapi/linux/virtio_net.h >> >> >> b/include/uapi/linux/virtio_net.h >> >> >> index b4062bed186a..12c1c9699935 100644 >> >> >> --- a/include/uapi/linux/virtio_net.h >> >> >> +++ b/include/uapi/linux/virtio_net.h >> >> >> @@ -61,6 +61,7 @@ >> >> >> #define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle >> >> >> USOv6 in. */ >> >> >> #define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */ >> >> >> #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ >> >> >> +#define VIRTIO_NET_F_GUEST_HDRLEN 59 /* Guest
Re: [patch net-next] net: virtio_net: implement exact header length guest feature
On Mon, Feb 20, 2023 at 09:35:00AM +0100, Jiri Pirko wrote: > Fri, Feb 17, 2023 at 02:47:36PM CET, m...@redhat.com wrote: > >On Fri, Feb 17, 2023 at 01:53:55PM +0100, Jiri Pirko wrote: > >> Fri, Feb 17, 2023 at 01:22:01PM CET, m...@redhat.com wrote: > >> >On Fri, Feb 17, 2023 at 01:15:47PM +0100, Jiri Pirko wrote: > >> >> From: Jiri Pirko > >> >> > >> >> virtio_net_hdr_from_skb() fills up hdr_len to skb_headlen(skb). > >> >> > >> >> Virtio spec introduced a feature VIRTIO_NET_F_GUEST_HDRLEN which when > >> >> set implicates that the driver provides the exact size of the header. > >> >> > >> >> The driver already complies to fill the correct value. Introduce the > >> >> feature and advertise it. > >> >> > >> >> Signed-off-by: Jiri Pirko > >> > > >> >Could you add a bit of motivation just for the record? > >> >Does this improve performance for some card? By how much? > >> >Expected to help some future card? > >> > >> I can get that info, but isn't that rather something to be appended to > >> the virtio-spec patch? I mean, the feature is there, this is just > >> implementing it in one driver. > > > >It is more like using it in the driver. It's not like we have to use > >everything - it could be useful for e.g. dpdk but not linux. > >Implementing it in the Linux driver has support costs - for example what > >if there's a bug and sometimes the length is incorrect? > >We'll be breaking things. > > I understand. To my understanding this feature just fixes the original > ambiguity in the virtio spec. > > Quoting the original virtio spec: > "hdr_len is a hint to the device as to how much of the header needs to > be kept to copy into each packet" > > "a hint" might not be clear for the reader what does it mean, if it is > "maybe like that" of "exactly like that". This feature just makes it > crystal clear. > > If you look at the tap implementation, it uses hdr_len to alloc > skb linear part. No hint, it counts with the provided value. > So if the driver is currently not precise, it breaks tap. Well that's only for gso though right? And making it bigger than necessary works fine ... > I will add this to the patch description and send v2. > I feel this does not answer the question yet, or maybe I am being dense. My point was not about making hdr_len precise. My point was that we are making a change here for no apparent reason. I am guessing you are not doing it for fun - so why? Is there a device with this feature bit you are aware of? > > > > >The patch was submitted by Marvell but they never bothered with > >using it in Linux. I guess they are using it for something else? > >CC Vitaly who put this in. > > > >> > >> > > >> >thanks! > >> > > >> > > >> >> --- > >> >> drivers/net/virtio_net.c| 6 -- > >> >> include/uapi/linux/virtio_net.h | 1 + > >> >> 2 files changed, 5 insertions(+), 2 deletions(-) > >> >> > >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > >> >> index fb5e68ed3ec2..e85b03988733 100644 > >> >> --- a/drivers/net/virtio_net.c > >> >> +++ b/drivers/net/virtio_net.c > >> >> @@ -62,7 +62,8 @@ static const unsigned long guest_offloads[] = { > >> >> VIRTIO_NET_F_GUEST_UFO, > >> >> VIRTIO_NET_F_GUEST_CSUM, > >> >> VIRTIO_NET_F_GUEST_USO4, > >> >> - VIRTIO_NET_F_GUEST_USO6 > >> >> + VIRTIO_NET_F_GUEST_USO6, > >> >> + VIRTIO_NET_F_GUEST_HDRLEN > >> >> }; > >> >> > >> >> #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | > >> >> \ > >> >> @@ -4213,7 +4214,8 @@ static struct virtio_device_id id_table[] = { > >> >> VIRTIO_NET_F_CTRL_MAC_ADDR, \ > >> >> VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > >> >> VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > >> >> - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, > >> >> VIRTIO_NET_F_NOTF_COAL > >> >> + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, > >> >> VIRTIO_NET_F_NOTF_COAL, \ > >> >> + VIRTIO_NET_F_GUEST_HDRLEN > >> >> > >> >> static unsigned int features[] = { > >> >> VIRTNET_FEATURES, > >> >> diff --git a/include/uapi/linux/virtio_net.h > >> >> b/include/uapi/linux/virtio_net.h > >> >> index b4062bed186a..12c1c9699935 100644 > >> >> --- a/include/uapi/linux/virtio_net.h > >> >> +++ b/include/uapi/linux/virtio_net.h > >> >> @@ -61,6 +61,7 @@ > >> >> #define VIRTIO_NET_F_GUEST_USO655 /* Guest can handle > >> >> USOv6 in. */ > >> >> #define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */ > >> >> #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > >> >> +#define VIRTIO_NET_F_GUEST_HDRLEN 59 /* Guest provides the exact > >> >> hdr_len value. */ > >> >> #define VIRTIO_NET_F_RSS 60/* Supports RSS RX steering */ > >> >> #define VIRTIO_NET_F_RSC_EXT 61/* extended coalescing info */ > >> >> #define VIRTIO_NET_F_STANDBY 62/* Act as standby for another > >> >> device > >> >> -- > >> >> 2.39.0 > >> > > >
Re: [PATCH vhost 01/10] virtio_ring: split: refactor virtqueue_add_split() for premapped
On Mon, Feb 20, 2023 at 01:37:37PM +0800, Jason Wang wrote: > On Tue, Feb 14, 2023 at 3:27 PM Xuan Zhuo wrote: > > > > DMA-related logic is separated from the virtqueue_add_split to prepare > > for subsequent support for premapped. > > The patch seems to do more than what is described here. > > To simplify reviewers, I'd suggest to split this patch into three: > > 1) virtqueue_add_split_prepare() (could we have a better name?) > 2) virtqueue_map_sgs() > 3) virtqueue_add_split_vring() > > (Or only factor DMA parts out, I haven't gone through the reset of the > patches) > > Thanks > It's pretty small, even split is not mandatary imho. But definitely please do document what is done fully. > > > > Signed-off-by: Xuan Zhuo > > --- > > drivers/virtio/virtio_ring.c | 219 --- > > 1 file changed, 152 insertions(+), 67 deletions(-) > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > index 41144b5246a8..560ee30d942c 100644 > > --- a/drivers/virtio/virtio_ring.c > > +++ b/drivers/virtio/virtio_ring.c > > @@ -520,29 +520,83 @@ static inline unsigned int > > virtqueue_add_desc_split(struct virtqueue *vq, > > return next; > > } > > > > -static inline int virtqueue_add_split(struct virtqueue *_vq, > > - struct scatterlist *sgs[], > > - unsigned int total_sg, > > - unsigned int out_sgs, > > - unsigned int in_sgs, > > - void *data, > > - void *ctx, > > - gfp_t gfp) > > +static int virtqueue_map_sgs(struct vring_virtqueue *vq, > > +struct scatterlist *sgs[], > > +unsigned int total_sg, > > +unsigned int out_sgs, > > +unsigned int in_sgs) > > { > > - struct vring_virtqueue *vq = to_vvq(_vq); > > struct scatterlist *sg; > > - struct vring_desc *desc; > > - unsigned int i, n, avail, descs_used, prev, err_idx; > > - int head; > > - bool indirect; > > + unsigned int n; > > > > - START_USE(vq); > > + for (n = 0; n < out_sgs; n++) { > > + for (sg = sgs[n]; sg; sg = sg_next(sg)) { > > + dma_addr_t addr = vring_map_one_sg(vq, sg, > > DMA_TO_DEVICE); > > + > > + if (vring_mapping_error(vq, addr)) > > + return -ENOMEM; > > + > > + sg->dma_address = addr; > > + } > > + } > > + for (; n < (out_sgs + in_sgs); n++) { > > + for (sg = sgs[n]; sg; sg = sg_next(sg)) { > > + dma_addr_t addr = vring_map_one_sg(vq, sg, > > DMA_FROM_DEVICE); > > + > > + if (vring_mapping_error(vq, addr)) > > + return -ENOMEM; > > + > > + sg->dma_address = addr; > > + } > > + } > > + > > + return 0; > > +} > > + > > +static void virtqueue_unmap_sgs(struct vring_virtqueue *vq, > > + struct scatterlist *sgs[], > > + unsigned int total_sg, > > + unsigned int out_sgs, > > + unsigned int in_sgs) > > +{ > > + struct scatterlist *sg; > > + unsigned int n; > > + > > + for (n = 0; n < out_sgs; n++) { > > + for (sg = sgs[n]; sg; sg = sg_next(sg)) { > > + if (!sg->dma_address) > > + return; > > + > > + dma_unmap_single(vring_dma_dev(vq), sg->dma_address, > > +sg->length, DMA_TO_DEVICE); > > + } > > + } > > + for (; n < (out_sgs + in_sgs); n++) { > > + for (sg = sgs[n]; sg; sg = sg_next(sg)) { > > + if (!sg->dma_address) > > + return; > > + > > + dma_unmap_single(vring_dma_dev(vq), sg->dma_address, > > +sg->length, DMA_FROM_DEVICE); > > + } > > + } > > +} > > + > > +static inline int virtqueue_add_split_prepare(struct vring_virtqueue *vq, > > + unsigned int total_sg, > > + unsigned int out_sgs, > > + void *data, > > + void *ctx, > > + gfp_t gfp, > > + struct vring_desc **pdesc) > > +{ > > + struct vring_desc *desc; > > + unsigned int descs_used; > > > > BUG_ON(data == NULL); > >
Re: [PATCH] drm/gem: Expose the buffer object handle to userspace last
Am 20.02.23 um 10:55 schrieb Tvrtko Ursulin: Hi, On 14/02/2023 13:59, Christian König wrote: Am 14.02.23 um 13:50 schrieb Tvrtko Ursulin: From: Tvrtko Ursulin Currently drm_gem_handle_create_tail exposes the handle to userspace before the buffer object constructions is complete. This allowing of working against a partially constructed object, which may also be in the process of having its creation fail, can have a range of negative outcomes. A lot of those will depend on what the individual drivers are doing in their obj->funcs->open() callbacks, and also with a common failure mode being -ENOMEM from drm_vma_node_allow. We can make sure none of this can happen by allocating a handle last, although with a downside that more of the function now runs under the dev->object_name_lock. Looking into the individual drivers open() hooks, we have amdgpu_gem_object_open which seems like it could have a potential security issue without this change. A couple drivers like qxl_gem_object_open and vmw_gem_object_open implement no-op hooks so no impact for them. A bunch of other require a deeper look by individual owners to asses for impact. Those are lima_gem_object_open, nouveau_gem_object_open, panfrost_gem_open, radeon_gem_object_open and virtio_gpu_gem_object_open. Putting aside the risk assesment of the above, some common scenarios to think about are along these lines: 1) Userspace closes a handle by speculatively "guessing" it from a second thread. This results in an unreachable buffer object so, a memory leak. 2) Same as 1), but object is in the process of getting closed (failed creation). The second thread is then able to re-cycle the handle and idr_remove would in the first thread would then remove the handle it does not own from the idr. 3) Going back to the earlier per driver problem space - individual impact assesment of allowing a second thread to access and operate on a partially constructed handle / object. (Can something crash? Leak information?) In terms of identifying when the problem started I will tag some patches as references, but not all, if even any, of them actually point to a broken state. I am just identifying points at which more opportunity for issues to arise was added. Yes I've looked into this once as well, but couldn't completely solve it for some reason. Give me a day or two to get this tested and all the logic swapped back into my head again. Managed to recollect what the problem with earlier attempts was? Nope, that's way to long ago. I can only assume that I ran into problems with the object_name_lock. Probably best to double check if that doesn't result in a lock inversion when somebody grabs the reservation lock in their ->load() callback. Regards, Christian. Regards, Tvrtko Christian. References: 304eda32920b ("drm/gem: add hooks to notify driver when object handle is created/destroyed") References: ca481c9b2a3a ("drm/gem: implement vma access management") References: b39b5394fabc ("drm/gem: Add drm_gem_object_funcs") Cc: dri-de...@lists.freedesktop.org Cc: Rob Clark Cc: Ben Skeggs Cc: David Herrmann Cc: Noralf Trønnes Cc: David Airlie Cc: Daniel Vetter Cc: amd-...@lists.freedesktop.org Cc: l...@lists.freedesktop.org Cc: nouv...@lists.freedesktop.org Cc: Steven Price Cc: virtualization@lists.linux-foundation.org Cc: spice-de...@lists.freedesktop.org Cc: Zack Rusin --- drivers/gpu/drm/drm_gem.c | 48 +++ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index aa15c52ae182..e3d897bca0f2 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -356,52 +356,52 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, u32 *handlep) { struct drm_device *dev = obj->dev; - u32 handle; int ret; WARN_ON(!mutex_is_locked(>object_name_lock)); if (obj->handle_count++ == 0) drm_gem_object_get(obj); + ret = drm_vma_node_allow(>vma_node, file_priv); + if (ret) + goto err_put; + + if (obj->funcs->open) { + ret = obj->funcs->open(obj, file_priv); + if (ret) + goto err_revoke; + } + /* - * Get the user-visible handle using idr. Preload and perform - * allocation under our spinlock. + * Get the user-visible handle using idr as the _last_ step. + * Preload and perform allocation under our spinlock. */ idr_preload(GFP_KERNEL); spin_lock(_priv->table_lock); - ret = idr_alloc(_priv->object_idr, obj, 1, 0, GFP_NOWAIT); - spin_unlock(_priv->table_lock); idr_preload_end(); - mutex_unlock(>object_name_lock); if (ret < 0) - goto err_unref; - - handle = ret; + goto err_close; - ret = drm_vma_node_allow(>vma_node, file_priv); - if (ret) - goto err_remove; + mutex_unlock(>object_name_lock); - if (obj->funcs->open) { -
Re: [patch net-next] net: virtio_net: implement exact header length guest feature
Fri, Feb 17, 2023 at 02:47:36PM CET, m...@redhat.com wrote: >On Fri, Feb 17, 2023 at 01:53:55PM +0100, Jiri Pirko wrote: >> Fri, Feb 17, 2023 at 01:22:01PM CET, m...@redhat.com wrote: >> >On Fri, Feb 17, 2023 at 01:15:47PM +0100, Jiri Pirko wrote: >> >> From: Jiri Pirko >> >> >> >> virtio_net_hdr_from_skb() fills up hdr_len to skb_headlen(skb). >> >> >> >> Virtio spec introduced a feature VIRTIO_NET_F_GUEST_HDRLEN which when >> >> set implicates that the driver provides the exact size of the header. >> >> >> >> The driver already complies to fill the correct value. Introduce the >> >> feature and advertise it. >> >> >> >> Signed-off-by: Jiri Pirko >> > >> >Could you add a bit of motivation just for the record? >> >Does this improve performance for some card? By how much? >> >Expected to help some future card? >> >> I can get that info, but isn't that rather something to be appended to >> the virtio-spec patch? I mean, the feature is there, this is just >> implementing it in one driver. > >It is more like using it in the driver. It's not like we have to use >everything - it could be useful for e.g. dpdk but not linux. >Implementing it in the Linux driver has support costs - for example what >if there's a bug and sometimes the length is incorrect? >We'll be breaking things. I understand. To my understanding this feature just fixes the original ambiguity in the virtio spec. Quoting the original virtio spec: "hdr_len is a hint to the device as to how much of the header needs to be kept to copy into each packet" "a hint" might not be clear for the reader what does it mean, if it is "maybe like that" of "exactly like that". This feature just makes it crystal clear. If you look at the tap implementation, it uses hdr_len to alloc skb linear part. No hint, it counts with the provided value. So if the driver is currently not precise, it breaks tap. I will add this to the patch description and send v2. > >The patch was submitted by Marvell but they never bothered with >using it in Linux. I guess they are using it for something else? >CC Vitaly who put this in. > >> >> > >> >thanks! >> > >> > >> >> --- >> >> drivers/net/virtio_net.c| 6 -- >> >> include/uapi/linux/virtio_net.h | 1 + >> >> 2 files changed, 5 insertions(+), 2 deletions(-) >> >> >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >> >> index fb5e68ed3ec2..e85b03988733 100644 >> >> --- a/drivers/net/virtio_net.c >> >> +++ b/drivers/net/virtio_net.c >> >> @@ -62,7 +62,8 @@ static const unsigned long guest_offloads[] = { >> >> VIRTIO_NET_F_GUEST_UFO, >> >> VIRTIO_NET_F_GUEST_CSUM, >> >> VIRTIO_NET_F_GUEST_USO4, >> >> - VIRTIO_NET_F_GUEST_USO6 >> >> + VIRTIO_NET_F_GUEST_USO6, >> >> + VIRTIO_NET_F_GUEST_HDRLEN >> >> }; >> >> >> >> #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ >> >> @@ -4213,7 +4214,8 @@ static struct virtio_device_id id_table[] = { >> >> VIRTIO_NET_F_CTRL_MAC_ADDR, \ >> >> VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ >> >> VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ >> >> - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL >> >> + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \ >> >> + VIRTIO_NET_F_GUEST_HDRLEN >> >> >> >> static unsigned int features[] = { >> >> VIRTNET_FEATURES, >> >> diff --git a/include/uapi/linux/virtio_net.h >> >> b/include/uapi/linux/virtio_net.h >> >> index b4062bed186a..12c1c9699935 100644 >> >> --- a/include/uapi/linux/virtio_net.h >> >> +++ b/include/uapi/linux/virtio_net.h >> >> @@ -61,6 +61,7 @@ >> >> #define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */ >> >> #define VIRTIO_NET_F_HOST_USO56 /* Host can handle USO in. */ >> >> #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ >> >> +#define VIRTIO_NET_F_GUEST_HDRLEN 59/* Guest provides the exact >> >> hdr_len value. */ >> >> #define VIRTIO_NET_F_RSS 60/* Supports RSS RX steering */ >> >> #define VIRTIO_NET_F_RSC_EXT 61/* extended coalescing info */ >> >> #define VIRTIO_NET_F_STANDBY 62/* Act as standby for another >> >> device >> >> -- >> >> 2.39.0 >> > > ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization