Re: [virtio-dev] Re: [PATCH 5/6] virtio/vsock: add support for dgram

2022-08-16 Thread Arseniy Krasnov
On 17.08.2022 08:01, Arseniy Krasnov wrote:
> On 16.08.2022 05:32, Bobby Eshleman wrote:
>> CC'ing virtio-dev@lists.oasis-open.org
>>
>> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
>>> This patch supports dgram in virtio and on the vhost side.
> Hello,
> 
> sorry, i don't understand, how this maintains message boundaries? Or it
> is unnecessary for SOCK_DGRAM?
> 
> Thanks
>>>
>>> Signed-off-by: Jiang Wang 
>>> Signed-off-by: Bobby Eshleman 
>>> ---
>>>  drivers/vhost/vsock.c   |   2 +-
>>>  include/net/af_vsock.h  |   2 +
>>>  include/uapi/linux/virtio_vsock.h   |   1 +
>>>  net/vmw_vsock/af_vsock.c|  26 +++-
>>>  net/vmw_vsock/virtio_transport.c|   2 +-
>>>  net/vmw_vsock/virtio_transport_common.c | 173 ++--
>>>  6 files changed, 186 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>>> index a5d1bdb786fe..3dc72a5647ca 100644
>>> --- a/drivers/vhost/vsock.c
>>> +++ b/drivers/vhost/vsock.c
>>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
>>> int ret;
>>>  
>>> ret = vsock_core_register(_transport.transport,
>>> - VSOCK_TRANSPORT_F_H2G);
>>> + VSOCK_TRANSPORT_F_H2G | 
>>> VSOCK_TRANSPORT_F_DGRAM);
>>> if (ret < 0)
>>> return ret;
>>>  
>>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>>> index 1c53c4c4d88f..37e55c81e4df 100644
>>> --- a/include/net/af_vsock.h
>>> +++ b/include/net/af_vsock.h
>>> @@ -78,6 +78,8 @@ struct vsock_sock {
>>>  s64 vsock_stream_has_data(struct vsock_sock *vsk);
>>>  s64 vsock_stream_has_space(struct vsock_sock *vsk);
>>>  struct sock *vsock_create_connected(struct sock *parent);
>>> +int vsock_bind_stream(struct vsock_sock *vsk,
>>> + struct sockaddr_vm *addr);
>>>  
>>>  / TRANSPORT /
>>>  
>>> diff --git a/include/uapi/linux/virtio_vsock.h 
>>> b/include/uapi/linux/virtio_vsock.h
>>> index 857df3a3a70d..0975b9c88292 100644
>>> --- a/include/uapi/linux/virtio_vsock.h
>>> +++ b/include/uapi/linux/virtio_vsock.h
>>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
>>>  enum virtio_vsock_type {
>>> VIRTIO_VSOCK_TYPE_STREAM = 1,
>>> VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
>>> +   VIRTIO_VSOCK_TYPE_DGRAM = 3,
>>>  };
>>>  
>>>  enum virtio_vsock_op {
>>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>>> index 1893f8aafa48..87e4ae1866d3 100644
>>> --- a/net/vmw_vsock/af_vsock.c
>>> +++ b/net/vmw_vsock/af_vsock.c
>>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock 
>>> *vsk,
>>> return 0;
>>>  }
>>>  
>>> +int vsock_bind_stream(struct vsock_sock *vsk,
>>> + struct sockaddr_vm *addr)
>>> +{
>>> +   int retval;
>>> +
>>> +   spin_lock_bh(_table_lock);
>>> +   retval = __vsock_bind_connectible(vsk, addr);
>>> +   spin_unlock_bh(_table_lock);
>>> +
>>> +   return retval;
>>> +}
>>> +EXPORT_SYMBOL(vsock_bind_stream);
>>> +
>>>  static int __vsock_bind_dgram(struct vsock_sock *vsk,
>>>   struct sockaddr_vm *addr)
>>>  {
>>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct 
>>> vsock_transport *t, int features)
>>> }
>>>  
>>> if (features & VSOCK_TRANSPORT_F_DGRAM) {
>>> -   if (t_dgram) {
>>> -   err = -EBUSY;
>>> -   goto err_busy;
>>> +   /* TODO: always chose the G2H variant over others, support 
>>> nesting later */
>>> +   if (features & VSOCK_TRANSPORT_F_G2H) {
>>> +   if (t_dgram)
>>> +   pr_warn("virtio_vsock: t_dgram already set\n");
>>> +   t_dgram = t;
>>> +   }
>>> +
>>> +   if (!t_dgram) {
>>> +   t_dgram = t;
>>> }
>>> -   t_dgram = t;
>>> }
>>>  
>>> if (features & VSOCK_TRANSPORT_F_LOCAL) {
>>> diff --git a/net/vmw_vsock/virtio_transport.c 
>>> b/net/vmw_vsock/virtio_transport.c
>>> index 073314312683..d4526ca462d2 100644
>>> --- a/net/vmw_vsock/virtio_transport.c
>>> +++ b/net/vmw_vsock/virtio_transport.c
>>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
>>> return -ENOMEM;
>>>  
>>> ret = vsock_core_register(_transport.transport,
>>> - VSOCK_TRANSPORT_F_G2H);
>>> + VSOCK_TRANSPORT_F_G2H | 
>>> VSOCK_TRANSPORT_F_DGRAM);
>>> if (ret)
>>> goto out_wq;
>>>  
>>> diff --git a/net/vmw_vsock/virtio_transport_common.c 
>>> b/net/vmw_vsock/virtio_transport_common.c
>>> index bdf16fff054f..aedb48728677 100644
>>> --- a/net/vmw_vsock/virtio_transport_common.c
>>> +++ b/net/vmw_vsock/virtio_transport_common.c
>>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>>>  
>>>  static u16 virtio_transport_get_type(struct sock *sk)
>>>  {
>>> -   if (sk->sk_type == SOCK_STREAM)
>>> +   if (sk->sk_type == 

Re: [virtio-dev] Re: [PATCH 2/6] vsock: return errors other than -ENOMEM to socket

2022-08-16 Thread Arseniy Krasnov
On 16.08.2022 05:30, Bobby Eshleman wrote:
> CC'ing virtio-dev@lists.oasis-open.org
> 
> On Mon, Aug 15, 2022 at 10:56:05AM -0700, Bobby Eshleman wrote:
>> This commit allows vsock implementations to return errors
>> to the socket layer other than -ENOMEM. One immediate effect
>> of this is that upon the sk_sndbuf threshold being reached -EAGAIN
>> will be returned and userspace may throttle appropriately.
>>
>> Resultingly, a known issue with uperf is resolved[1].
>>
>> Additionally, to preserve legacy behavior for non-virtio
>> implementations, hyperv/vmci force errors to be -ENOMEM so that behavior
>> is unchanged.
>>
>> [1]: https://gitlab.com/vsock/vsock/-/issues/1
>>
>> Signed-off-by: Bobby Eshleman 
>> ---
>>  include/linux/virtio_vsock.h| 3 +++
>>  net/vmw_vsock/af_vsock.c| 3 ++-
>>  net/vmw_vsock/hyperv_transport.c| 2 +-
>>  net/vmw_vsock/virtio_transport_common.c | 3 ---
>>  net/vmw_vsock/vmci_transport.c  | 9 -
>>  5 files changed, 14 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>> index 17ed01466875..9a37eddbb87a 100644
>> --- a/include/linux/virtio_vsock.h
>> +++ b/include/linux/virtio_vsock.h
>> @@ -8,6 +8,9 @@
>>  #include 
>>  #include 
>>  
>> +/* Threshold for detecting small packets to copy */
>> +#define GOOD_COPY_LEN  128
>> +
>>  enum virtio_vsock_metadata_flags {
>>  VIRTIO_VSOCK_METADATA_FLAGS_REPLY   = BIT(0),
>>  VIRTIO_VSOCK_METADATA_FLAGS_TAP_DELIVERED   = BIT(1),
>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> index e348b2d09eac..1893f8aafa48 100644
>> --- a/net/vmw_vsock/af_vsock.c
>> +++ b/net/vmw_vsock/af_vsock.c
>> @@ -1844,8 +1844,9 @@ static int vsock_connectible_sendmsg(struct socket 
>> *sock, struct msghdr *msg,
>>  written = transport->stream_enqueue(vsk,
>>  msg, len - total_written);
>>  }
>> +
>>  if (written < 0) {
>> -err = -ENOMEM;
>> +err = written;
>>  goto out_err;
>>  }
IIUC, for stream, this thing will have effect, only one first transport access 
fails. In this
case 'total_written' will be 0, so 'err' == 'written' will be returned. But 
when 'total_written > 0',
'err' will be overwritten by 'total_written' below, preserving current 
behaviour. Is it what You
supposed?

Thanks
>>  
>> diff --git a/net/vmw_vsock/hyperv_transport.c 
>> b/net/vmw_vsock/hyperv_transport.c
>> index fd98229e3db3..e99aea571f6f 100644
>> --- a/net/vmw_vsock/hyperv_transport.c
>> +++ b/net/vmw_vsock/hyperv_transport.c
>> @@ -687,7 +687,7 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock 
>> *vsk, struct msghdr *msg,
>>  if (bytes_written)
>>  ret = bytes_written;
>>  kfree(send_buf);
>> -return ret;
>> +return ret < 0 ? -ENOMEM : ret;
>>  }
>>  
>>  static s64 hvs_stream_has_data(struct vsock_sock *vsk)
>> diff --git a/net/vmw_vsock/virtio_transport_common.c 
>> b/net/vmw_vsock/virtio_transport_common.c
>> index 920578597bb9..d5780599fe93 100644
>> --- a/net/vmw_vsock/virtio_transport_common.c
>> +++ b/net/vmw_vsock/virtio_transport_common.c
>> @@ -23,9 +23,6 @@
>>  /* How long to wait for graceful shutdown of a connection */
>>  #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
>>  
>> -/* Threshold for detecting small packets to copy */
>> -#define GOOD_COPY_LEN  128
>> -
>>  static const struct virtio_transport *
>>  virtio_transport_get_ops(struct vsock_sock *vsk)
>>  {
>> diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
>> index b14f0ed7427b..c927a90dc859 100644
>> --- a/net/vmw_vsock/vmci_transport.c
>> +++ b/net/vmw_vsock/vmci_transport.c
>> @@ -1838,7 +1838,14 @@ static ssize_t vmci_transport_stream_enqueue(
>>  struct msghdr *msg,
>>  size_t len)
>>  {
>> -return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
>> +int err;
>> +
>> +err = vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
>> +
>> +if (err < 0)
>> +err = -ENOMEM;
>> +
>> +return err;
>>  }
>>  
>>  static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
>> -- 
>> 2.35.1
>>
> 
> -
> To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org
> 



Re: [virtio-dev] Re: [PATCH 5/6] virtio/vsock: add support for dgram

2022-08-16 Thread Arseniy Krasnov
On 16.08.2022 05:32, Bobby Eshleman wrote:
> CC'ing virtio-dev@lists.oasis-open.org
> 
> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
>> This patch supports dgram in virtio and on the vhost side.
Hello,

sorry, i don't understand, how this maintains message boundaries? Or it
is unnecessary for SOCK_DGRAM?

Thanks
>>
>> Signed-off-by: Jiang Wang 
>> Signed-off-by: Bobby Eshleman 
>> ---
>>  drivers/vhost/vsock.c   |   2 +-
>>  include/net/af_vsock.h  |   2 +
>>  include/uapi/linux/virtio_vsock.h   |   1 +
>>  net/vmw_vsock/af_vsock.c|  26 +++-
>>  net/vmw_vsock/virtio_transport.c|   2 +-
>>  net/vmw_vsock/virtio_transport_common.c | 173 ++--
>>  6 files changed, 186 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> index a5d1bdb786fe..3dc72a5647ca 100644
>> --- a/drivers/vhost/vsock.c
>> +++ b/drivers/vhost/vsock.c
>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
>>  int ret;
>>  
>>  ret = vsock_core_register(_transport.transport,
>> -  VSOCK_TRANSPORT_F_H2G);
>> +  VSOCK_TRANSPORT_F_H2G | 
>> VSOCK_TRANSPORT_F_DGRAM);
>>  if (ret < 0)
>>  return ret;
>>  
>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>> index 1c53c4c4d88f..37e55c81e4df 100644
>> --- a/include/net/af_vsock.h
>> +++ b/include/net/af_vsock.h
>> @@ -78,6 +78,8 @@ struct vsock_sock {
>>  s64 vsock_stream_has_data(struct vsock_sock *vsk);
>>  s64 vsock_stream_has_space(struct vsock_sock *vsk);
>>  struct sock *vsock_create_connected(struct sock *parent);
>> +int vsock_bind_stream(struct vsock_sock *vsk,
>> +  struct sockaddr_vm *addr);
>>  
>>  / TRANSPORT /
>>  
>> diff --git a/include/uapi/linux/virtio_vsock.h 
>> b/include/uapi/linux/virtio_vsock.h
>> index 857df3a3a70d..0975b9c88292 100644
>> --- a/include/uapi/linux/virtio_vsock.h
>> +++ b/include/uapi/linux/virtio_vsock.h
>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
>>  enum virtio_vsock_type {
>>  VIRTIO_VSOCK_TYPE_STREAM = 1,
>>  VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
>> +VIRTIO_VSOCK_TYPE_DGRAM = 3,
>>  };
>>  
>>  enum virtio_vsock_op {
>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> index 1893f8aafa48..87e4ae1866d3 100644
>> --- a/net/vmw_vsock/af_vsock.c
>> +++ b/net/vmw_vsock/af_vsock.c
>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock 
>> *vsk,
>>  return 0;
>>  }
>>  
>> +int vsock_bind_stream(struct vsock_sock *vsk,
>> +  struct sockaddr_vm *addr)
>> +{
>> +int retval;
>> +
>> +spin_lock_bh(_table_lock);
>> +retval = __vsock_bind_connectible(vsk, addr);
>> +spin_unlock_bh(_table_lock);
>> +
>> +return retval;
>> +}
>> +EXPORT_SYMBOL(vsock_bind_stream);
>> +
>>  static int __vsock_bind_dgram(struct vsock_sock *vsk,
>>struct sockaddr_vm *addr)
>>  {
>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport 
>> *t, int features)
>>  }
>>  
>>  if (features & VSOCK_TRANSPORT_F_DGRAM) {
>> -if (t_dgram) {
>> -err = -EBUSY;
>> -goto err_busy;
>> +/* TODO: always chose the G2H variant over others, support 
>> nesting later */
>> +if (features & VSOCK_TRANSPORT_F_G2H) {
>> +if (t_dgram)
>> +pr_warn("virtio_vsock: t_dgram already set\n");
>> +t_dgram = t;
>> +}
>> +
>> +if (!t_dgram) {
>> +t_dgram = t;
>>  }
>> -t_dgram = t;
>>  }
>>  
>>  if (features & VSOCK_TRANSPORT_F_LOCAL) {
>> diff --git a/net/vmw_vsock/virtio_transport.c 
>> b/net/vmw_vsock/virtio_transport.c
>> index 073314312683..d4526ca462d2 100644
>> --- a/net/vmw_vsock/virtio_transport.c
>> +++ b/net/vmw_vsock/virtio_transport.c
>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
>>  return -ENOMEM;
>>  
>>  ret = vsock_core_register(_transport.transport,
>> -  VSOCK_TRANSPORT_F_G2H);
>> +  VSOCK_TRANSPORT_F_G2H | 
>> VSOCK_TRANSPORT_F_DGRAM);
>>  if (ret)
>>  goto out_wq;
>>  
>> diff --git a/net/vmw_vsock/virtio_transport_common.c 
>> b/net/vmw_vsock/virtio_transport_common.c
>> index bdf16fff054f..aedb48728677 100644
>> --- a/net/vmw_vsock/virtio_transport_common.c
>> +++ b/net/vmw_vsock/virtio_transport_common.c
>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>>  
>>  static u16 virtio_transport_get_type(struct sock *sk)
>>  {
>> -if (sk->sk_type == SOCK_STREAM)
>> +if (sk->sk_type == SOCK_DGRAM)
>> +return VIRTIO_VSOCK_TYPE_DGRAM;
>> +else if (sk->sk_type == SOCK_STREAM)
>>  return VIRTIO_VSOCK_TYPE_STREAM;

Re: [virtio-dev] [RFC PATCH v3] virtio-blk: add zoned block device specification

2022-08-16 Thread Dmitry Fomichev
On Sat, 2022-08-06 at 17:20 -0400, Stefan Hajnoczi wrote:
> On Thu, 4 Aug 2022 at 18:41, Dmitry Fomichev  wrote:
> 
> Hi Dmitry,
> I think RFC can be removed from the Subject line for the next revision
> of this patch.
> 
> For instructions on how to bring this to a Technical Committee vote, see:
> https://github.com/oasis-tcs/virtio-spec#use-of-github-issues
> 
> > 
> > Introduce support for Zoned Block Devices to virtio.
> > 
> > Zoned Block Devices (ZBDs) aim to achieve a better capacity, latency
> > and/or cost characteristics compared to commonly available block
> > devices by getting the entire LBA space of the device divided to block
> > regions that are much larger than the LBA size. These regions are
> > called zones and they can only be written sequentially. More details
> > about ZBDs can be found at
> > 
> > https://zonedstorage.io/docs/introduction/zoned-storage .
> > 
> > In its current form, the virtio protocol for block devices (virtio-blk)
> > is not aware of ZBDs but it allows the guest to successfully scan a
> > host-managed drive provided by the host. As the result, the
> > host-managed drive appears at the guest as a regular drive that will
> > operate erroneously under the most common write workloads.
> > 
> > To fix this, the virtio-blk protocol needs to be extended to add the
> > capabilities to convey the zone characteristics of host ZBDs to the
> > guest and to provide the support for ZBD-specific commands - Report
> > Zones, four zone operations and (optionally) Zone Append. The proposed
> > standard extension aims to provide this functionality.
> > 
> > This patch extends the virtio-blk section of virtio specification with
> > the minimum set of requirements that are necessary to support ZBDs.
> > The resulting device model is a subset of the models defined in ZAC/ZBC
> > and ZNS standards documents. The included functionality mirrors
> > the existing Linux kernel block layer ZBD support and should be
> > sufficient to handle the host-managed and host-aware HDDs that are on
> > the market today as well as ZNS SSDs that are entering the market at
> > the moment of this patch submission.
> > 
> > I have developed a proof of concept patch series that adds ZBD support
> > to virtio-blk Linux kernel driver by implementing the protocol
> > extensions defined in the spec patch. I would like to receive feedback
> > on this specification patch before posting that series to the block
> > LKML.
> > 
> > I would like to thank the following people for their useful feedback
> > and suggestions while working on the initial iterations of this patch.
> > 
> > Damien Le Moal 
> > Matias Bjørling 
> > Niklas Cassel 
> > Hans Holmberg 
> > 
> 



> 
> > +VIRTIO_BLK_T_ZONE_OPEN, VIRTIO_BLK_T_ZONE_CLOSE, VIRTIO_BLK_T_ZONE_FINISH
> > and
> > +VIRTIO_BLK_T_ZONE_RESET requests make the zone operation to act on a
> > particular
> > +zone specified by the zone sector address in the \field{sector} of the
> > request.
> > +The zone sector address is a 64-bit value expressed in 512-byte units that
> > +points at the first sector in the target zone.
> 
> Normally the struct virtio_blk_req sector field is expressed in bytes.
> If I'm interpreting this text correctly the zone management commands
> change the field's meaning to sector units?
> 

These should be in bytes, I'll make sure it is clear.

> > +
> > +VIRTIO_BLK_T_ZONE_RESET_ALL request acts upon all applicable zones of the
> > +device. For this request, the driver MUST set the field \field{sector} to
> > zero
> > +value.
> > +
> > +The \field{sector} field of the VIRTIO_BLK_T_ZONE_APPEND request MUST
> > specify
> > +the first sector of the zone to which data is to be appended at the 
> > position
> > of
> > +the write pointer. The zone sector address is a 64-bit value expressed in
> > +512-byte units that points anywhere in the target zone. The size of the 
> > data
> 
> This "512-byte units" wording confuses me. I think the units should be
> bytes and the text should say "in multiples of 512 bytes".

The wording about "512-byte units" is actually present in the spec in the part
that is related to discard. I do agree that it would be better to stick with the
way the units are described for read/write requests, will reword.

> 
> > +that is appended MUST NOT exceed the \field{max_append_sectors} value
> > provided
> > +by the device in \field{virtio_blk_zoned_characteristics} configuration
> > space
> > +structure.
> > +
> > +Upon a successful completion of a VIRTIO_BLK_T_ZONE_APPEND request, the
> > driver
> > +SHOULD read the starting sector location of the written data from the
> > request field
> > +\field{append_sector}.
> 
> Maybe that depends on the application? If the application blindly
> appends and doesn't use the write pointer then there's no need to read
> append_sector. I think this statement can be removed and instead the
> non-normative section can describe the purpose of the append_sector
> field so that driver authors know the new 

[virtio-dev] Re: [PATCH 6/6] vsock_test: add tests for vsock dgram

2022-08-16 Thread Bobby Eshleman
CC'ing virtio-dev@lists.oasis-open.org

On Mon, Aug 15, 2022 at 10:56:09AM -0700, Bobby Eshleman wrote:
> From: Jiang Wang 
> 
> Added test cases for vsock dgram types.
> 
> Signed-off-by: Jiang Wang 
> ---
>  tools/testing/vsock/util.c   | 105 +
>  tools/testing/vsock/util.h   |   4 +
>  tools/testing/vsock/vsock_test.c | 195 +++
>  3 files changed, 304 insertions(+)
> 
> diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
> index 2acbb7703c6a..d2f5b223bf85 100644
> --- a/tools/testing/vsock/util.c
> +++ b/tools/testing/vsock/util.c
> @@ -260,6 +260,57 @@ void send_byte(int fd, int expected_ret, int flags)
>   }
>  }
>  
> +/* Transmit one byte and check the return value.
> + *
> + * expected_ret:
> + *  <0 Negative errno (for testing errors)
> + *   0 End-of-file
> + *   1 Success
> + */
> +void sendto_byte(int fd, const struct sockaddr *dest_addr, int len, int 
> expected_ret,
> + int flags)
> +{
> + const uint8_t byte = 'A';
> + ssize_t nwritten;
> +
> + timeout_begin(TIMEOUT);
> + do {
> + nwritten = sendto(fd, , sizeof(byte), flags, dest_addr,
> + len);
> + timeout_check("write");
> + } while (nwritten < 0 && errno == EINTR);
> + timeout_end();
> +
> + if (expected_ret < 0) {
> + if (nwritten != -1) {
> + fprintf(stderr, "bogus sendto(2) return value %zd\n",
> + nwritten);
> + exit(EXIT_FAILURE);
> + }
> + if (errno != -expected_ret) {
> + perror("write");
> + exit(EXIT_FAILURE);
> + }
> + return;
> + }
> +
> + if (nwritten < 0) {
> + perror("write");
> + exit(EXIT_FAILURE);
> + }
> + if (nwritten == 0) {
> + if (expected_ret == 0)
> + return;
> +
> + fprintf(stderr, "unexpected EOF while sending byte\n");
> + exit(EXIT_FAILURE);
> + }
> + if (nwritten != sizeof(byte)) {
> + fprintf(stderr, "bogus sendto(2) return value %zd\n", nwritten);
> + exit(EXIT_FAILURE);
> + }
> +}
> +
>  /* Receive one byte and check the return value.
>   *
>   * expected_ret:
> @@ -313,6 +364,60 @@ void recv_byte(int fd, int expected_ret, int flags)
>   }
>  }
>  
> +/* Receive one byte and check the return value.
> + *
> + * expected_ret:
> + *  <0 Negative errno (for testing errors)
> + *   0 End-of-file
> + *   1 Success
> + */
> +void recvfrom_byte(int fd, struct sockaddr *src_addr, socklen_t *addrlen,
> + int expected_ret, int flags)
> +{
> + uint8_t byte;
> + ssize_t nread;
> +
> + timeout_begin(TIMEOUT);
> + do {
> + nread = recvfrom(fd, , sizeof(byte), flags, src_addr, 
> addrlen);
> + timeout_check("read");
> + } while (nread < 0 && errno == EINTR);
> + timeout_end();
> +
> + if (expected_ret < 0) {
> + if (nread != -1) {
> + fprintf(stderr, "bogus recvfrom(2) return value %zd\n",
> + nread);
> + exit(EXIT_FAILURE);
> + }
> + if (errno != -expected_ret) {
> + perror("read");
> + exit(EXIT_FAILURE);
> + }
> + return;
> + }
> +
> + if (nread < 0) {
> + perror("read");
> + exit(EXIT_FAILURE);
> + }
> + if (nread == 0) {
> + if (expected_ret == 0)
> + return;
> +
> + fprintf(stderr, "unexpected EOF while receiving byte\n");
> + exit(EXIT_FAILURE);
> + }
> + if (nread != sizeof(byte)) {
> + fprintf(stderr, "bogus recvfrom(2) return value %zd\n", nread);
> + exit(EXIT_FAILURE);
> + }
> + if (byte != 'A') {
> + fprintf(stderr, "unexpected byte read %c\n", byte);
> + exit(EXIT_FAILURE);
> + }
> +}
> +
>  /* Run test cases.  The program terminates if a failure occurs. */
>  void run_tests(const struct test_case *test_cases,
>  const struct test_opts *opts)
> diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
> index a3375ad2fb7f..7213f2a51c1e 100644
> --- a/tools/testing/vsock/util.h
> +++ b/tools/testing/vsock/util.h
> @@ -43,7 +43,11 @@ int vsock_seqpacket_accept(unsigned int cid, unsigned int 
> port,
>  struct sockaddr_vm *clientaddrp);
>  void vsock_wait_remote_close(int fd);
>  void send_byte(int fd, int expected_ret, int flags);
> +void sendto_byte(int fd, const struct sockaddr *dest_addr, int len, int 
> expected_ret,
> + int flags);
>  void recv_byte(int fd, int expected_ret, int flags);
> +void recvfrom_byte(int fd, struct sockaddr 

[virtio-dev] Re: [PATCH 5/6] virtio/vsock: add support for dgram

2022-08-16 Thread Bobby Eshleman
CC'ing virtio-dev@lists.oasis-open.org

On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> This patch supports dgram in virtio and on the vhost side.
> 
> Signed-off-by: Jiang Wang 
> Signed-off-by: Bobby Eshleman 
> ---
>  drivers/vhost/vsock.c   |   2 +-
>  include/net/af_vsock.h  |   2 +
>  include/uapi/linux/virtio_vsock.h   |   1 +
>  net/vmw_vsock/af_vsock.c|  26 +++-
>  net/vmw_vsock/virtio_transport.c|   2 +-
>  net/vmw_vsock/virtio_transport_common.c | 173 ++--
>  6 files changed, 186 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index a5d1bdb786fe..3dc72a5647ca 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
>   int ret;
>  
>   ret = vsock_core_register(_transport.transport,
> -   VSOCK_TRANSPORT_F_H2G);
> +   VSOCK_TRANSPORT_F_H2G | 
> VSOCK_TRANSPORT_F_DGRAM);
>   if (ret < 0)
>   return ret;
>  
> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> index 1c53c4c4d88f..37e55c81e4df 100644
> --- a/include/net/af_vsock.h
> +++ b/include/net/af_vsock.h
> @@ -78,6 +78,8 @@ struct vsock_sock {
>  s64 vsock_stream_has_data(struct vsock_sock *vsk);
>  s64 vsock_stream_has_space(struct vsock_sock *vsk);
>  struct sock *vsock_create_connected(struct sock *parent);
> +int vsock_bind_stream(struct vsock_sock *vsk,
> +   struct sockaddr_vm *addr);
>  
>  / TRANSPORT /
>  
> diff --git a/include/uapi/linux/virtio_vsock.h 
> b/include/uapi/linux/virtio_vsock.h
> index 857df3a3a70d..0975b9c88292 100644
> --- a/include/uapi/linux/virtio_vsock.h
> +++ b/include/uapi/linux/virtio_vsock.h
> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
>  enum virtio_vsock_type {
>   VIRTIO_VSOCK_TYPE_STREAM = 1,
>   VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> + VIRTIO_VSOCK_TYPE_DGRAM = 3,
>  };
>  
>  enum virtio_vsock_op {
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index 1893f8aafa48..87e4ae1866d3 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock 
> *vsk,
>   return 0;
>  }
>  
> +int vsock_bind_stream(struct vsock_sock *vsk,
> +   struct sockaddr_vm *addr)
> +{
> + int retval;
> +
> + spin_lock_bh(_table_lock);
> + retval = __vsock_bind_connectible(vsk, addr);
> + spin_unlock_bh(_table_lock);
> +
> + return retval;
> +}
> +EXPORT_SYMBOL(vsock_bind_stream);
> +
>  static int __vsock_bind_dgram(struct vsock_sock *vsk,
> struct sockaddr_vm *addr)
>  {
> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct vsock_transport 
> *t, int features)
>   }
>  
>   if (features & VSOCK_TRANSPORT_F_DGRAM) {
> - if (t_dgram) {
> - err = -EBUSY;
> - goto err_busy;
> + /* TODO: always chose the G2H variant over others, support 
> nesting later */
> + if (features & VSOCK_TRANSPORT_F_G2H) {
> + if (t_dgram)
> + pr_warn("virtio_vsock: t_dgram already set\n");
> + t_dgram = t;
> + }
> +
> + if (!t_dgram) {
> + t_dgram = t;
>   }
> - t_dgram = t;
>   }
>  
>   if (features & VSOCK_TRANSPORT_F_LOCAL) {
> diff --git a/net/vmw_vsock/virtio_transport.c 
> b/net/vmw_vsock/virtio_transport.c
> index 073314312683..d4526ca462d2 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
>   return -ENOMEM;
>  
>   ret = vsock_core_register(_transport.transport,
> -   VSOCK_TRANSPORT_F_G2H);
> +   VSOCK_TRANSPORT_F_G2H | 
> VSOCK_TRANSPORT_F_DGRAM);
>   if (ret)
>   goto out_wq;
>  
> diff --git a/net/vmw_vsock/virtio_transport_common.c 
> b/net/vmw_vsock/virtio_transport_common.c
> index bdf16fff054f..aedb48728677 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>  
>  static u16 virtio_transport_get_type(struct sock *sk)
>  {
> - if (sk->sk_type == SOCK_STREAM)
> + if (sk->sk_type == SOCK_DGRAM)
> + return VIRTIO_VSOCK_TYPE_DGRAM;
> + else if (sk->sk_type == SOCK_STREAM)
>   return VIRTIO_VSOCK_TYPE_STREAM;
>   else
>   return VIRTIO_VSOCK_TYPE_SEQPACKET;
> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct 
> vsock_sock *vsk,
>   vvs = vsk->trans;
>  
>   /* we can send less than pkt_len bytes */
> - if 

[virtio-dev] Re: [PATCH 4/6] virtio/vsock: add VIRTIO_VSOCK_F_DGRAM feature bit

2022-08-16 Thread Bobby Eshleman
CC'ing virtio-dev@lists.oasis-open.org

On Mon, Aug 15, 2022 at 10:56:07AM -0700, Bobby Eshleman wrote:
> This commit adds a feature bit for virtio vsock to support datagrams.
> 
> Signed-off-by: Jiang Wang 
> Signed-off-by: Bobby Eshleman 
> ---
>  drivers/vhost/vsock.c | 3 ++-
>  include/uapi/linux/virtio_vsock.h | 1 +
>  net/vmw_vsock/virtio_transport.c  | 8 ++--
>  3 files changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index b20ddec2664b..a5d1bdb786fe 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -32,7 +32,8 @@
>  enum {
>   VHOST_VSOCK_FEATURES = VHOST_FEATURES |
>  (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
> -(1ULL << VIRTIO_VSOCK_F_SEQPACKET)
> +(1ULL << VIRTIO_VSOCK_F_SEQPACKET) |
> +(1ULL << VIRTIO_VSOCK_F_DGRAM)
>  };
>  
>  enum {
> diff --git a/include/uapi/linux/virtio_vsock.h 
> b/include/uapi/linux/virtio_vsock.h
> index 64738838bee5..857df3a3a70d 100644
> --- a/include/uapi/linux/virtio_vsock.h
> +++ b/include/uapi/linux/virtio_vsock.h
> @@ -40,6 +40,7 @@
>  
>  /* The feature bitmap for virtio vsock */
>  #define VIRTIO_VSOCK_F_SEQPACKET 1   /* SOCK_SEQPACKET supported */
> +#define VIRTIO_VSOCK_F_DGRAM 2   /* Host support dgram vsock */
>  
>  struct virtio_vsock_config {
>   __le64 guest_cid;
> diff --git a/net/vmw_vsock/virtio_transport.c 
> b/net/vmw_vsock/virtio_transport.c
> index c6212eb38d3c..073314312683 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -35,6 +35,7 @@ static struct virtio_transport virtio_transport; /* forward 
> declaration */
>  struct virtio_vsock {
>   struct virtio_device *vdev;
>   struct virtqueue *vqs[VSOCK_VQ_MAX];
> + bool has_dgram;
>  
>   /* Virtqueue processing is deferred to a workqueue */
>   struct work_struct tx_work;
> @@ -709,7 +710,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
>   }
>  
>   vsock->vdev = vdev;
> -
>   vsock->rx_buf_nr = 0;
>   vsock->rx_buf_max_nr = 0;
>   atomic_set(>queued_replies, 0);
> @@ -726,6 +726,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
>   if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
>   vsock->seqpacket_allow = true;
>  
> + if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_DGRAM))
> + vsock->has_dgram = true;
> +
>   vdev->priv = vsock;
>  
>   ret = virtio_vsock_vqs_init(vsock);
> @@ -820,7 +823,8 @@ static struct virtio_device_id id_table[] = {
>  };
>  
>  static unsigned int features[] = {
> - VIRTIO_VSOCK_F_SEQPACKET
> + VIRTIO_VSOCK_F_SEQPACKET,
> + VIRTIO_VSOCK_F_DGRAM
>  };
>  
>  static struct virtio_driver virtio_vsock_driver = {
> -- 
> 2.35.1
> 

-
To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org



[virtio-dev] Re: [PATCH 3/6] vsock: add netdev to vhost/virtio vsock

2022-08-16 Thread Bobby Eshleman
CC'ing virtio-dev@lists.oasis-open.org

On Mon, Aug 15, 2022 at 10:56:06AM -0700, Bobby Eshleman wrote:
> In order to support usage of qdisc on vsock traffic, this commit
> introduces a struct net_device to vhost and virtio vsock.
> 
> Two new devices are created, vhost-vsock for vhost and virtio-vsock
> for virtio. The devices are attached to the respective transports.
> 
> To bypass the usage of the device, the user may "down" the associated
> network interface using common tools. For example, "ip link set dev
> virtio-vsock down" lets vsock bypass the net_device and qdisc entirely,
> simply using the FIFO logic of the prior implementation.
> 
> For both hosts and guests, there is one device for all G2H vsock sockets
> and one device for all H2G vsock sockets. This makes sense for guests
> because the driver only supports a single vsock channel (one pair of
> TX/RX virtqueues), so one device and qdisc fits. For hosts, this may not
> seem ideal for some workloads. However, it is possible to use a
> multi-queue qdisc, where a given queue is responsible for a range of
> sockets. This seems to be a better solution than having one device per
> socket, which may yield a very large number of devices and qdiscs, all
> of which are dynamically being created and destroyed. Because of this
> dynamism, it would also require a complex policy management daemon, as
> devices would constantly be spun up and down as sockets were created and
> destroyed. To avoid this, one device and qdisc also applies to all H2G
> sockets.
> 
> Signed-off-by: Bobby Eshleman 
> ---
>  drivers/vhost/vsock.c   |  19 +++-
>  include/linux/virtio_vsock.h|  10 +++
>  net/vmw_vsock/virtio_transport.c|  19 +++-
>  net/vmw_vsock/virtio_transport_common.c | 112 +++-
>  4 files changed, 152 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index f8601d93d94d..b20ddec2664b 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -927,13 +927,30 @@ static int __init vhost_vsock_init(void)
> VSOCK_TRANSPORT_F_H2G);
>   if (ret < 0)
>   return ret;
> - return misc_register(_vsock_misc);
> +
> + ret = virtio_transport_init(_transport, "vhost-vsock");
> + if (ret < 0)
> + goto out_unregister;
> +
> + ret = misc_register(_vsock_misc);
> + if (ret < 0)
> + goto out_transport_exit;
> + return ret;
> +
> +out_transport_exit:
> + virtio_transport_exit(_transport);
> +
> +out_unregister:
> + vsock_core_unregister(_transport.transport);
> + return ret;
> +
>  };
>  
>  static void __exit vhost_vsock_exit(void)
>  {
>   misc_deregister(_vsock_misc);
>   vsock_core_unregister(_transport.transport);
> + virtio_transport_exit(_transport);
>  };
>  
>  module_init(vhost_vsock_init);
> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> index 9a37eddbb87a..5d7e7fbd75f8 100644
> --- a/include/linux/virtio_vsock.h
> +++ b/include/linux/virtio_vsock.h
> @@ -91,10 +91,20 @@ struct virtio_transport {
>   /* This must be the first field */
>   struct vsock_transport transport;
>  
> + /* Used almost exclusively for qdisc */
> + struct net_device *dev;
> +
>   /* Takes ownership of the packet */
>   int (*send_pkt)(struct sk_buff *skb);
>  };
>  
> +int
> +virtio_transport_init(struct virtio_transport *t,
> +   const char *name);
> +
> +void
> +virtio_transport_exit(struct virtio_transport *t);
> +
>  ssize_t
>  virtio_transport_stream_dequeue(struct vsock_sock *vsk,
>   struct msghdr *msg,
> diff --git a/net/vmw_vsock/virtio_transport.c 
> b/net/vmw_vsock/virtio_transport.c
> index 3bb293fd8607..c6212eb38d3c 100644
> --- a/net/vmw_vsock/virtio_transport.c
> +++ b/net/vmw_vsock/virtio_transport.c
> @@ -131,7 +131,9 @@ virtio_transport_send_pkt_work(struct work_struct *work)
>* the vq
>*/
>   if (ret < 0) {
> - skb_queue_head(>send_pkt_queue, skb);
> + spin_lock_bh(>send_pkt_queue.lock);
> + __skb_queue_head(>send_pkt_queue, skb);
> + spin_unlock_bh(>send_pkt_queue.lock);
>   break;
>   }
>  
> @@ -676,7 +678,9 @@ static void virtio_vsock_vqs_del(struct virtio_vsock 
> *vsock)
>   kfree_skb(skb);
>   mutex_unlock(>tx_lock);
>  
> - skb_queue_purge(>send_pkt_queue);
> + spin_lock_bh(>send_pkt_queue.lock);
> + __skb_queue_purge(>send_pkt_queue);
> + spin_unlock_bh(>send_pkt_queue.lock);
>  
>   /* Delete virtqueues and flush outstanding callbacks if any */
>   vdev->config->del_vqs(vdev);
> @@ -760,6 +764,8 @@ static void virtio_vsock_remove(struct virtio_device 
> *vdev)
>   flush_work(>event_work);
>   flush_work(>send_pkt_work);
>  
> + 

[virtio-dev] Re: [PATCH 2/6] vsock: return errors other than -ENOMEM to socket

2022-08-16 Thread Bobby Eshleman
CC'ing virtio-dev@lists.oasis-open.org

On Mon, Aug 15, 2022 at 10:56:05AM -0700, Bobby Eshleman wrote:
> This commit allows vsock implementations to return errors
> to the socket layer other than -ENOMEM. One immediate effect
> of this is that upon the sk_sndbuf threshold being reached -EAGAIN
> will be returned and userspace may throttle appropriately.
> 
> Resultingly, a known issue with uperf is resolved[1].
> 
> Additionally, to preserve legacy behavior for non-virtio
> implementations, hyperv/vmci force errors to be -ENOMEM so that behavior
> is unchanged.
> 
> [1]: https://gitlab.com/vsock/vsock/-/issues/1
> 
> Signed-off-by: Bobby Eshleman 
> ---
>  include/linux/virtio_vsock.h| 3 +++
>  net/vmw_vsock/af_vsock.c| 3 ++-
>  net/vmw_vsock/hyperv_transport.c| 2 +-
>  net/vmw_vsock/virtio_transport_common.c | 3 ---
>  net/vmw_vsock/vmci_transport.c  | 9 -
>  5 files changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> index 17ed01466875..9a37eddbb87a 100644
> --- a/include/linux/virtio_vsock.h
> +++ b/include/linux/virtio_vsock.h
> @@ -8,6 +8,9 @@
>  #include 
>  #include 
>  
> +/* Threshold for detecting small packets to copy */
> +#define GOOD_COPY_LEN  128
> +
>  enum virtio_vsock_metadata_flags {
>   VIRTIO_VSOCK_METADATA_FLAGS_REPLY   = BIT(0),
>   VIRTIO_VSOCK_METADATA_FLAGS_TAP_DELIVERED   = BIT(1),
> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> index e348b2d09eac..1893f8aafa48 100644
> --- a/net/vmw_vsock/af_vsock.c
> +++ b/net/vmw_vsock/af_vsock.c
> @@ -1844,8 +1844,9 @@ static int vsock_connectible_sendmsg(struct socket 
> *sock, struct msghdr *msg,
>   written = transport->stream_enqueue(vsk,
>   msg, len - total_written);
>   }
> +
>   if (written < 0) {
> - err = -ENOMEM;
> + err = written;
>   goto out_err;
>   }
>  
> diff --git a/net/vmw_vsock/hyperv_transport.c 
> b/net/vmw_vsock/hyperv_transport.c
> index fd98229e3db3..e99aea571f6f 100644
> --- a/net/vmw_vsock/hyperv_transport.c
> +++ b/net/vmw_vsock/hyperv_transport.c
> @@ -687,7 +687,7 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, 
> struct msghdr *msg,
>   if (bytes_written)
>   ret = bytes_written;
>   kfree(send_buf);
> - return ret;
> + return ret < 0 ? -ENOMEM : ret;
>  }
>  
>  static s64 hvs_stream_has_data(struct vsock_sock *vsk)
> diff --git a/net/vmw_vsock/virtio_transport_common.c 
> b/net/vmw_vsock/virtio_transport_common.c
> index 920578597bb9..d5780599fe93 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -23,9 +23,6 @@
>  /* How long to wait for graceful shutdown of a connection */
>  #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
>  
> -/* Threshold for detecting small packets to copy */
> -#define GOOD_COPY_LEN  128
> -
>  static const struct virtio_transport *
>  virtio_transport_get_ops(struct vsock_sock *vsk)
>  {
> diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
> index b14f0ed7427b..c927a90dc859 100644
> --- a/net/vmw_vsock/vmci_transport.c
> +++ b/net/vmw_vsock/vmci_transport.c
> @@ -1838,7 +1838,14 @@ static ssize_t vmci_transport_stream_enqueue(
>   struct msghdr *msg,
>   size_t len)
>  {
> - return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
> + int err;
> +
> + err = vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
> +
> + if (err < 0)
> + err = -ENOMEM;
> +
> + return err;
>  }
>  
>  static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
> -- 
> 2.35.1
> 

-
To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org



[virtio-dev] Re: [PATCH 1/6] vsock: replace virtio_vsock_pkt with sk_buff

2022-08-16 Thread Bobby Eshleman
CC'ing virtio-dev@lists.oasis-open.org

On Mon, Aug 15, 2022 at 10:56:04AM -0700, Bobby Eshleman wrote:
> This patch replaces virtio_vsock_pkt with sk_buff.
> 
> The benefit of this series includes:
> 
> * The bug reported @ https://bugzilla.redhat.com/show_bug.cgi?id=2009935
>   does not present itself when reasonable sk_sndbuf thresholds are set.
> * Using sock_alloc_send_skb() teaches VSOCK to respect
>   sk_sndbuf for tunability.
> * Eliminates copying for vsock_deliver_tap().
> * sk_buff is required for future improvements, such as using socket map.
> 
> Signed-off-by: Bobby Eshleman 
> ---
>  drivers/vhost/vsock.c   | 214 +--
>  include/linux/virtio_vsock.h|  60 ++-
>  net/vmw_vsock/af_vsock.c|   1 +
>  net/vmw_vsock/virtio_transport.c| 212 +-
>  net/vmw_vsock/virtio_transport_common.c | 491 
>  net/vmw_vsock/vsock_loopback.c  |  51 +--
>  6 files changed, 517 insertions(+), 512 deletions(-)
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 368330417bde..f8601d93d94d 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -51,8 +51,7 @@ struct vhost_vsock {
>   struct hlist_node hash;
>  
>   struct vhost_work send_pkt_work;
> - spinlock_t send_pkt_list_lock;
> - struct list_head send_pkt_list; /* host->guest pending packets */
> + struct sk_buff_head send_pkt_queue; /* host->guest pending packets */
>  
>   atomic_t queued_replies;
>  
> @@ -108,7 +107,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>   vhost_disable_notify(>dev, vq);
>  
>   do {
> - struct virtio_vsock_pkt *pkt;
> + struct sk_buff *skb;
> + struct virtio_vsock_hdr *hdr;
>   struct iov_iter iov_iter;
>   unsigned out, in;
>   size_t nbytes;
> @@ -116,31 +116,22 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>   int head;
>   u32 flags_to_restore = 0;
>  
> - spin_lock_bh(>send_pkt_list_lock);
> - if (list_empty(>send_pkt_list)) {
> - spin_unlock_bh(>send_pkt_list_lock);
> + skb = skb_dequeue(>send_pkt_queue);
> +
> + if (!skb) {
>   vhost_enable_notify(>dev, vq);
>   break;
>   }
>  
> - pkt = list_first_entry(>send_pkt_list,
> -struct virtio_vsock_pkt, list);
> - list_del_init(>list);
> - spin_unlock_bh(>send_pkt_list_lock);
> -
>   head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
>, , NULL, NULL);
>   if (head < 0) {
> - spin_lock_bh(>send_pkt_list_lock);
> - list_add(>list, >send_pkt_list);
> - spin_unlock_bh(>send_pkt_list_lock);
> + skb_queue_head(>send_pkt_queue, skb);
>   break;
>   }
>  
>   if (head == vq->num) {
> - spin_lock_bh(>send_pkt_list_lock);
> - list_add(>list, >send_pkt_list);
> - spin_unlock_bh(>send_pkt_list_lock);
> + skb_queue_head(>send_pkt_queue, skb);
>  
>   /* We cannot finish yet if more buffers snuck in while
>* re-enabling notify.
> @@ -153,26 +144,27 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
>   }
>  
>   if (out) {
> - virtio_transport_free_pkt(pkt);
> + kfree_skb(skb);
>   vq_err(vq, "Expected 0 output buffers, got %u\n", out);
>   break;
>   }
>  
>   iov_len = iov_length(>iov[out], in);
> - if (iov_len < sizeof(pkt->hdr)) {
> - virtio_transport_free_pkt(pkt);
> + if (iov_len < sizeof(*hdr)) {
> + kfree_skb(skb);
>   vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
>   break;
>   }
>  
>   iov_iter_init(_iter, READ, >iov[out], in, iov_len);
> - payload_len = pkt->len - pkt->off;
> + payload_len = skb->len - vsock_metadata(skb)->off;
> + hdr = vsock_hdr(skb);
>  
>   /* If the packet is greater than the space available in the
>* buffer, we split it using multiple buffers.
>*/
> - if (payload_len > iov_len - sizeof(pkt->hdr)) {
> - payload_len = iov_len - sizeof(pkt->hdr);
> + if (payload_len > iov_len - sizeof(*hdr)) {
> + payload_len = iov_len - sizeof(*hdr);
>  
>   /* As we are copying pieces of large packet's buffer to
>* small rx buffers, headers of 

[virtio-dev] Re: [PATCH 0/6] virtio/vsock: introduce dgrams, sk_buff, and qdisc

2022-08-16 Thread Bobby Eshleman
CC'ing virtio-dev@lists.oasis-open.org

On Mon, Aug 15, 2022 at 10:56:03AM -0700, Bobby Eshleman wrote:
> Hey everybody,
> 
> This series introduces datagrams, packet scheduling, and sk_buff usage
> to virtio vsock.
> 
> The usage of struct sk_buff benefits users by a) preparing vsock to use
> other related systems that require sk_buff, such as sockmap and qdisc,
> b) supporting basic congestion control via sock_alloc_send_skb, and c)
> reducing copying when delivering packets to TAP.
> 
> The socket layer no longer forces errors to be -ENOMEM, as typically
> userspace expects -EAGAIN when the sk_sndbuf threshold is reached and
> messages are being sent with option MSG_DONTWAIT.
> 
> The datagram work is based off previous patches by Jiang Wang[1].
> 
> The introduction of datagrams creates a transport layer fairness issue
> where datagrams may freely starve streams of queue access. This happens
> because, unlike streams, datagrams lack the transactions necessary for
> calculating credits and throttling.
> 
> Previous proposals introduce changes to the spec to add an additional
> virtqueue pair for datagrams[1]. Although this solution works, using
> Linux's qdisc for packet scheduling leverages already existing systems,
> avoids the need to change the virtio specification, and gives additional
> capabilities. The usage of SFQ or fq_codel, for example, may solve the
> transport layer starvation problem. It is easy to imagine other use
> cases as well. For example, services of varying importance may be
> assigned different priorities, and qdisc will apply appropriate
> priority-based scheduling. By default, the system default pfifo qdisc is
> used. The qdisc may be bypassed and legacy queuing is resumed by simply
> setting the virtio-vsock%d network device to state DOWN. This technique
> still allows vsock to work with zero-configuration.
> 
> In summary, this series introduces these major changes to vsock:
> 
> - virtio vsock supports datagrams
> - virtio vsock uses struct sk_buff instead of virtio_vsock_pkt
>   - Because virtio vsock uses sk_buff, it also uses sock_alloc_send_skb,
> which applies the throttling threshold sk_sndbuf.
> - The vsock socket layer supports returning errors other than -ENOMEM.
>   - This is used to return -EAGAIN when the sk_sndbuf threshold is
> reached.
> - virtio vsock uses a net_device, through which qdisc may be used.
>  - qdisc allows scheduling policies to be applied to vsock flows.
>   - Some qdiscs, like SFQ, may allow vsock to avoid transport layer 
> congestion. That is,
> it may avoid datagrams from flooding out stream flows. The benefit
> to this is that additional virtqueues are not needed for datagrams.
>   - The net_device and qdisc is bypassed by simply setting the
> net_device state to DOWN.
> 
> [1]: 
> https://lore.kernel.org/all/20210914055440.3121004-1-jiang.w...@bytedance.com/
> 
> Bobby Eshleman (5):
>   vsock: replace virtio_vsock_pkt with sk_buff
>   vsock: return errors other than -ENOMEM to socket
>   vsock: add netdev to vhost/virtio vsock
>   virtio/vsock: add VIRTIO_VSOCK_F_DGRAM feature bit
>   virtio/vsock: add support for dgram
> 
> Jiang Wang (1):
>   vsock_test: add tests for vsock dgram
> 
>  drivers/vhost/vsock.c   | 238 
>  include/linux/virtio_vsock.h|  73 ++-
>  include/net/af_vsock.h  |   2 +
>  include/uapi/linux/virtio_vsock.h   |   2 +
>  net/vmw_vsock/af_vsock.c|  30 +-
>  net/vmw_vsock/hyperv_transport.c|   2 +-
>  net/vmw_vsock/virtio_transport.c| 237 +---
>  net/vmw_vsock/virtio_transport_common.c | 771 
>  net/vmw_vsock/vmci_transport.c  |   9 +-
>  net/vmw_vsock/vsock_loopback.c  |  51 +-
>  tools/testing/vsock/util.c  | 105 
>  tools/testing/vsock/util.h  |   4 +
>  tools/testing/vsock/vsock_test.c| 195 ++
>  13 files changed, 1176 insertions(+), 543 deletions(-)
> 
> -- 
> 2.35.1
> 

-
To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org



[virtio-dev] Re: [virtio] [PATCH RFC v7 6/8] ccw: disallow ADMIN_VQ

2022-08-16 Thread Michael S. Tsirkin
On Tue, Aug 16, 2022 at 11:48:51AM -0400, Michael S. Tsirkin wrote:
> On Tue, Aug 16, 2022 at 04:48:11PM +0200, Halil Pasic wrote:
> > On Fri, 12 Aug 2022 13:19:20 -0400
> > "Michael S. Tsirkin"  wrote:
> > 
> > > Signed-off-by: Michael S. Tsirkin 
> > > ---
> > >  content.tex | 10 ++
> > >  1 file changed, 10 insertions(+)
> > > 
> > > diff --git a/content.tex b/content.tex
> > > index 76b5a28..53be680 100644
> > > --- a/content.tex
> > > +++ b/content.tex
> > > @@ -2668,6 +2668,16 @@ \subsubsection{Handling Device 
> > > Features}\label{sec:Virtio Transport Options / Vi
> > >  uses the CCW_CMD_WRITE_FEAT command, denoting a 
> > > \field{features}/\field{index}
> > >  combination.
> > >  
> > > +\devicenormative{\paragraph}{Handling Device Features}{Virtio Transport 
> > > Options / Virtio over channel I/O / Device Initialization / Handling 
> > > Device Features}
> > > +
> > > +Device MUST NOT set bit VIRTIO_F_ADMIN_VQ (bit 41) in
> > > +DeviceFeatures.
> > > +
> > > +\drivernormative{\paragraph}{Handling Device Features}{Virtio Transport 
> > > Options / Virtio over channel I/O / Device Initialization / Handling 
> > > Device Features}
> > > +
> > > +Driver MUST NOT set bit VIRTIO_F_ADMIN_VQ (bit 41) in
> > > +DriverFeatures even if offered by the device.
> > > +
> > 
> > I'm not sure I understand the intention here. I believe what we try to
> > accomplish here is the following. The Channel I/O transport *currently*
> > does not support the VIRTIO_F_ADMIN_VQ feature. It is not like we want
> > to state that the feature VIRTIO_F_ADMIN_VQ won't ever be supported by
> > the Channel I/O transport. Or am I wrong?
> >
> > If my assumptions are right, then the old incarnation of the spec could
> > contradict the new incarnation of the spec. Thus I would prefer something
> > like.
> 
> Relaxing requirenents is always okay.
> 
> > 
> > """
> > Currently the following features are not supported by the Channel I/O
> > transport:
> > * VIRTIO_F_ADMIN_VQ
> > """
> 
> what I want to prevent is driver saying "oh device will not set ADMIN_VQ
> so it's ok to acknowledge it if offered, it is never offered even
> though it does not suport it".
> because then it becomes impossible to know when actually a new driver
> appears with actual support.
> 
> So, Maybe just add text
> 
> Note: future versions of this specification will allow setting ADMIN_VQ
> for driver and device. Device MUST NOT assume driver does not
> acknowledge ADMIN_VQ if offered.
> 
> And similarly for drivers:
> 
> Note: future versions of this specification will allow setting ADMIN_VQ
> for driver and device. Drivers MUST NOT assume ADMIN_VQ if not offered.
> 
> > 
> > If we want, we can also state what needs to be done in general when
> > features are unsupported by the transport. And yes, that normative
> > material in my opinion.
> > 
> > Regards,
> > Halil
> 
> 
> Are there other examples? I want to call out the list explicitly because
> it is so easy to enable an extra feature by mistake.
> 

And also, I don't *want* to make it easy to add features only to some
transports. Possible, ok, but not easy.

> > >  \subsubsection{Device Configuration}\label{sec:Virtio Transport Options 
> > > / Virtio over channel I/O / Device Initialization / Device Configuration}
> > >  
> > >  The device's configuration space is located in host memory.


-
To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org



[virtio-dev] Re: [virtio] [PATCH RFC v7 6/8] ccw: disallow ADMIN_VQ

2022-08-16 Thread Michael S. Tsirkin
On Tue, Aug 16, 2022 at 04:48:11PM +0200, Halil Pasic wrote:
> On Fri, 12 Aug 2022 13:19:20 -0400
> "Michael S. Tsirkin"  wrote:
> 
> > Signed-off-by: Michael S. Tsirkin 
> > ---
> >  content.tex | 10 ++
> >  1 file changed, 10 insertions(+)
> > 
> > diff --git a/content.tex b/content.tex
> > index 76b5a28..53be680 100644
> > --- a/content.tex
> > +++ b/content.tex
> > @@ -2668,6 +2668,16 @@ \subsubsection{Handling Device 
> > Features}\label{sec:Virtio Transport Options / Vi
> >  uses the CCW_CMD_WRITE_FEAT command, denoting a 
> > \field{features}/\field{index}
> >  combination.
> >  
> > +\devicenormative{\paragraph}{Handling Device Features}{Virtio Transport 
> > Options / Virtio over channel I/O / Device Initialization / Handling Device 
> > Features}
> > +
> > +Device MUST NOT set bit VIRTIO_F_ADMIN_VQ (bit 41) in
> > +DeviceFeatures.
> > +
> > +\drivernormative{\paragraph}{Handling Device Features}{Virtio Transport 
> > Options / Virtio over channel I/O / Device Initialization / Handling Device 
> > Features}
> > +
> > +Driver MUST NOT set bit VIRTIO_F_ADMIN_VQ (bit 41) in
> > +DriverFeatures even if offered by the device.
> > +
> 
> I'm not sure I understand the intention here. I believe what we try to
> accomplish here is the following. The Channel I/O transport *currently*
> does not support the VIRTIO_F_ADMIN_VQ feature. It is not like we want
> to state that the feature VIRTIO_F_ADMIN_VQ won't ever be supported by
> the Channel I/O transport. Or am I wrong?
>
> If my assumptions are right, then the old incarnation of the spec could
> contradict the new incarnation of the spec. Thus I would prefer something
> like.

Relaxing requirenents is always okay.

> 
> """
> Currently the following features are not supported by the Channel I/O
> transport:
> * VIRTIO_F_ADMIN_VQ
> """

what I want to prevent is driver saying "oh device will not set ADMIN_VQ
so it's ok to acknowledge it if offered, it is never offered even
though it does not suport it".
because then it becomes impossible to know when actually a new driver
appears with actual support.

So, Maybe just add text

Note: future versions of this specification will allow setting ADMIN_VQ
for driver and device. Device MUST NOT assume driver does not
acknowledge ADMIN_VQ if offered.

And similarly for drivers:

Note: future versions of this specification will allow setting ADMIN_VQ
for driver and device. Drivers MUST NOT assume ADMIN_VQ if not offered.

> 
> If we want, we can also state what needs to be done in general when
> features are unsupported by the transport. And yes, that normative
> material in my opinion.
> 
> Regards,
> Halil


Are there other examples? I want to call out the list explicitly because
it is so easy to enable an extra feature by mistake.


> >  \subsubsection{Device Configuration}\label{sec:Virtio Transport Options / 
> > Virtio over channel I/O / Device Initialization / Device Configuration}
> >  
> >  The device's configuration space is located in host memory.


-
To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org



[virtio-dev] Re: [virtio] [PATCH RFC v7 6/8] ccw: disallow ADMIN_VQ

2022-08-16 Thread Halil Pasic
On Fri, 12 Aug 2022 13:19:20 -0400
"Michael S. Tsirkin"  wrote:

> Signed-off-by: Michael S. Tsirkin 
> ---
>  content.tex | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/content.tex b/content.tex
> index 76b5a28..53be680 100644
> --- a/content.tex
> +++ b/content.tex
> @@ -2668,6 +2668,16 @@ \subsubsection{Handling Device 
> Features}\label{sec:Virtio Transport Options / Vi
>  uses the CCW_CMD_WRITE_FEAT command, denoting a 
> \field{features}/\field{index}
>  combination.
>  
> +\devicenormative{\paragraph}{Handling Device Features}{Virtio Transport 
> Options / Virtio over channel I/O / Device Initialization / Handling Device 
> Features}
> +
> +Device MUST NOT set bit VIRTIO_F_ADMIN_VQ (bit 41) in
> +DeviceFeatures.
> +
> +\drivernormative{\paragraph}{Handling Device Features}{Virtio Transport 
> Options / Virtio over channel I/O / Device Initialization / Handling Device 
> Features}
> +
> +Driver MUST NOT set bit VIRTIO_F_ADMIN_VQ (bit 41) in
> +DriverFeatures even if offered by the device.
> +

I'm not sure I understand the intention here. I believe what we try to
accomplish here is the following. The Channel I/O transport *currently*
does not support the VIRTIO_F_ADMIN_VQ feature. It is not like we want
to state that the feature VIRTIO_F_ADMIN_VQ won't ever be supported by
the Channel I/O transport. Or am I wrong?

If my assumptions are right, then the old incarnation of the spec could
contradict the new incarnation of the spec. Thus I would prefer something
like.

"""
Currently the following features are not supported by the Channel I/O
transport:
* VIRTIO_F_ADMIN_VQ
"""


If we want, we can also state what needs to be done in general when
features are unsupported by the transport. And yes, that normative
material in my opinion.

Regards,
Halil

>  \subsubsection{Device Configuration}\label{sec:Virtio Transport Options / 
> Virtio over channel I/O / Device Initialization / Device Configuration}
>  
>  The device's configuration space is located in host memory.


-
To unsubscribe, e-mail: virtio-dev-unsubscr...@lists.oasis-open.org
For additional commands, e-mail: virtio-dev-h...@lists.oasis-open.org



[virtio-dev] [PATCH v7] virtio_net: support split header

2022-08-16 Thread Heng Qi
From: Xuan Zhuo 

The purpose of this feature is to split the header and the payload of
the packet.

|receive buffer|
|   0th descriptor | 1th descriptor|
| virtnet hdr | mac | ip hdr | tcp hdr|<-- hold -->|   payload |

We can use a buffer plus a separate page when allocating the receive
buffer. In this way, we can ensure that all payloads can be
independently in a page, which is very beneficial for the zerocopy
implemented by the upper layer.

Signed-off-by: Xuan Zhuo 
Signed-off-by: Heng Qi 
Reviewed-by: Kangjie Xu 
---
v7:
1. Fix some presentation issues.
2. Use "split transport header". @Jason Wang
3. Clarify some paragraphs. @Cornelia Huck
4. determine the device what to do if it does not perform header split 
on a packet.

v6:
1. Fix some syntax issues. @Cornelia Huck
2. Clarify some paragraphs. @Cornelia Huck
3. Determine the device what to do if it does not perform header split 
on a packet.

v5:
1. Determine when hdr_len is credible in the process of rx
2. Clean up the use of buffers and descriptors
3. Clarify the meaning of used lenght if the first descriptor is 
skipped in the case of merge

v4:
1. fix typo @Cornelia Huck @Jason Wang
2. do not split header for IP fragmentation packet. @Jason Wang

v3:
1. Fix some syntax issues
2. Fix some terminology issues
3. It is not unified with ip alignment, so ip alignment is not included
4. Make it clear that the device must support four types, in the case 
of successful negotiation.

 conformance.tex |   2 ++
 content.tex | 102 
 2 files changed, 104 insertions(+)

diff --git a/conformance.tex b/conformance.tex
index 2b86fc6..4e2b82e 100644
--- a/conformance.tex
+++ b/conformance.tex
@@ -150,6 +150,7 @@ \section{Conformance Targets}\label{sec:Conformance / 
Conformance Targets}
 \item \ref{drivernormative:Device Types / Network Device / Device Operation / 
Control Virtqueue / Offloads State Configuration / Setting Offloads State}
 \item \ref{drivernormative:Device Types / Network Device / Device Operation / 
Control Virtqueue / Receive-side scaling (RSS) }
 \item \ref{drivernormative:Device Types / Network Device / Device Operation / 
Control Virtqueue / Notifications Coalescing}
+\item \ref{drivernormative:Device Types / Network Device / Device Operation / 
Control Virtqueue / Split Transport Header}
 \end{itemize}
 
 \conformance{\subsection}{Block Driver Conformance}\label{sec:Conformance / 
Driver Conformance / Block Driver Conformance}
@@ -415,6 +416,7 @@ \section{Conformance Targets}\label{sec:Conformance / 
Conformance Targets}
 \item \ref{devicenormative:Device Types / Network Device / Device Operation / 
Control Virtqueue / Automatic receive steering in multiqueue mode}
 \item \ref{devicenormative:Device Types / Network Device / Device Operation / 
Control Virtqueue / Receive-side scaling (RSS) / RSS processing}
 \item \ref{devicenormative:Device Types / Network Device / Device Operation / 
Control Virtqueue / Notifications Coalescing}
+\item \ref{devicenormative:Device Types / Network Device / Device Operation / 
Control Virtqueue / Split Transport Header}
 \end{itemize}
 
 \conformance{\subsection}{Block Device Conformance}\label{sec:Conformance / 
Device Conformance / Block Device Conformance}
diff --git a/content.tex b/content.tex
index e863709..5676da9 100644
--- a/content.tex
+++ b/content.tex
@@ -3084,6 +3084,9 @@ \subsection{Feature bits}\label{sec:Device Types / 
Network Device / Feature bits
 \item[VIRTIO_NET_F_CTRL_MAC_ADDR(23)] Set MAC address through control
 channel.
 
+\item[VIRTIO_NET_F_SPLIT_TRANSPORT_HEADER (52)] Device supports splitting
+the transport header and the payload.
+
 \item[VIRTIO_NET_F_NOTF_COAL(53)] Device supports notifications coalescing.
 
 \item[VIRTIO_NET_F_GUEST_USO4 (54)] Driver can receive USOv4 packets.
@@ -3140,6 +3143,7 @@ \subsubsection{Feature bit requirements}\label{sec:Device 
Types / Network Device
 \item[VIRTIO_NET_F_NOTF_COAL] Requires VIRTIO_NET_F_CTRL_VQ.
 \item[VIRTIO_NET_F_RSC_EXT] Requires VIRTIO_NET_F_HOST_TSO4 or 
VIRTIO_NET_F_HOST_TSO6.
 \item[VIRTIO_NET_F_RSS] Requires VIRTIO_NET_F_CTRL_VQ.
+\item[VIRTIO_NET_F_SPLIT_TRANSPORT_HEADER] Requires VIRTIO_NET_F_CTRL_VQ.
 \end{description}
 
 \subsubsection{Legacy Interface: Feature bits}\label{sec:Device Types / 
Network Device / Feature bits / Legacy Interface: Feature bits}
@@ -3371,6 +3375,7 @@ \subsection{Device Operation}\label{sec:Device Types / 
Network Device / Device O
 #define VIRTIO_NET_HDR_F_NEEDS_CSUM1
 #define VIRTIO_NET_HDR_F_DATA_VALID2
 #define VIRTIO_NET_HDR_F_RSC_INFO  4
+#define VIRTIO_NET_HDR_F_SPLIT_TRANSPORT_HEADER  8
 u8 flags;
 #define VIRTIO_NET_HDR_GSO_NONE0
 #define VIRTIO_NET_HDR_GSO_TCPV4