Re: [PATCH net-next v2 3/3] xsk: build skb by page

2021-01-21 Thread Yunsheng Lin
On 2021/1/21 15:41, Magnus Karlsson wrote:
> On Wed, Jan 20, 2021 at 9:29 PM Alexander Lobakin  wrote:
>>
>> From: Xuan Zhuo 
>> Date: Wed, 20 Jan 2021 16:30:56 +0800
>>
>>> This patch is used to construct skb based on page to save memory copy
>>> overhead.
>>>
>>> This function is implemented based on IFF_TX_SKB_NO_LINEAR. Only the
>>> network card priv_flags supports IFF_TX_SKB_NO_LINEAR will use page to
>>> directly construct skb. If this feature is not supported, it is still
>>> necessary to copy data to construct skb.
>>>
>>>  Performance Testing 
>>>
>>> The test environment is Aliyun ECS server.
>>> Test cmd:
>>> ```
>>> xdpsock -i eth0 -t  -S -s 
>>> ```
>>>
>>> Test result data:
>>>
>>> size64  512 10241500
>>> copy1916747 1775988 1600203 1440054
>>> page1974058 1953655 1945463 1904478
>>> percent 3.0%10.0%   21.58%  32.3%
>>>
>>> Signed-off-by: Xuan Zhuo 
>>> Reviewed-by: Dust Li 
>>> ---
>>>  net/xdp/xsk.c | 104 
>>> --
>>>  1 file changed, 86 insertions(+), 18 deletions(-)
>>
>> Now I like the result, thanks!
>>
>> But Patchwork still display your series incorrectly (messages 0 and 1
>> are missing). I'm concerning maintainers may not take this in such
>> form. Try to pass the folder's name, not folder/*.patch to
>> git send-email when sending, and don't use --in-reply-to when sending
>> a new iteration.
> 
> Xuan,
> 
> Please make the new submission of the patch set a v3 even though you
> did not change the code. Just so we can clearly see it is the new
> submission.
> 
>>> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
>>> index 8037b04..40bac11 100644
>>> --- a/net/xdp/xsk.c
>>> +++ b/net/xdp/xsk.c
>>> @@ -430,6 +430,87 @@ static void xsk_destruct_skb(struct sk_buff *skb)
>>>   sock_wfree(skb);
>>>  }
>>>
>>> +static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
>>> +   struct xdp_desc *desc)
>>> +{
>>> + u32 len, offset, copy, copied;
>>> + struct sk_buff *skb;
>>> + struct page *page;
>>> + void *buffer;
>>> + int err, i;
>>> + u64 addr;
>>> +
>>> + skb = sock_alloc_send_skb(>sk, 0, 1, );
>>> + if (unlikely(!skb))
>>> + return ERR_PTR(err);
>>> +
>>> + addr = desc->addr;
>>> + len = desc->len;
>>> +
>>> + buffer = xsk_buff_raw_get_data(xs->pool, addr);
>>> + offset = offset_in_page(buffer);
>>> + addr = buffer - xs->pool->addrs;
>>> +
>>> + for (copied = 0, i = 0; copied < len; i++) {
>>> + page = xs->pool->umem->pgs[addr >> PAGE_SHIFT];
>>> +
>>> + get_page(page);
>>> +
>>> + copy = min_t(u32, PAGE_SIZE - offset, len - copied);
>>> +
>>> + skb_fill_page_desc(skb, i, page, offset, copy);
>>> +
>>> + copied += copy;
>>> + addr += copy;
>>> + offset = 0;
>>> + }
>>> +
>>> + skb->len += len;
>>> + skb->data_len += len;
>>> + skb->truesize += len;
>>> +
>>> + refcount_add(len, >sk.sk_wmem_alloc);
>>> +
>>> + return skb;
>>> +}
>>> +
>>> +static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
>>> +  struct xdp_desc *desc)
>>> +{
>>> + struct sk_buff *skb = NULL;

It seems the above init is unnecessary, for the skb is always
set before being used.

>>> +
>>> + if (xs->dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
>>> + skb = xsk_build_skb_zerocopy(xs, desc);
>>> + if (IS_ERR(skb))
>>> + return skb;
>>> + } else {
>>> + void *buffer;
>>> + u32 len;
>>> + int err;
>>> +
>>> + len = desc->len;
>>> + skb = sock_alloc_send_skb(>sk, len, 1, );
>>> + if (unlikely(!skb))
>>> + return ERR_PTR(err);
>>> +
>>> + skb_put(skb, len);
>>> + buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
>>> + err = skb_store_bits(skb, 0, buffer, len);
>>> + if (unlikely(err)) {
>>> + kfree_skb(skb);
>>> + return ERR_PTR(err);
>>> + }
>>> + }
>>> +
>>> + skb->dev = xs->dev;
>>> + skb->priority = xs->sk.sk_priority;
>>> + skb->mark = xs->sk.sk_mark;
>>> + skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
>>> + skb->destructor = xsk_destruct_skb;
>>> +
>>> + return skb;
>>> +}
>>> +
>>>  static int xsk_generic_xmit(struct sock *sk)
>>>  {
>>>   struct xdp_sock *xs = xdp_sk(sk);
>>> @@ -446,43 +527,30 @@ static int xsk_generic_xmit(struct sock *sk)
>>>   goto out;
>>>
>>>   while (xskq_cons_peek_desc(xs->tx, , xs->pool)) {
>>> - char *buffer;
>>> - u64 addr;
>>> - u32 len;
>>> -
>>>   if (max_batch-- == 0) {
>>>   err = -EAGAIN;
>>>   goto out;
>>>   }
>>>

Re: [PATCH net-next v2 3/3] xsk: build skb by page

2021-01-20 Thread Magnus Karlsson
On Wed, Jan 20, 2021 at 9:29 PM Alexander Lobakin  wrote:
>
> From: Xuan Zhuo 
> Date: Wed, 20 Jan 2021 16:30:56 +0800
>
> > This patch is used to construct skb based on page to save memory copy
> > overhead.
> >
> > This function is implemented based on IFF_TX_SKB_NO_LINEAR. Only the
> > network card priv_flags supports IFF_TX_SKB_NO_LINEAR will use page to
> > directly construct skb. If this feature is not supported, it is still
> > necessary to copy data to construct skb.
> >
> >  Performance Testing 
> >
> > The test environment is Aliyun ECS server.
> > Test cmd:
> > ```
> > xdpsock -i eth0 -t  -S -s 
> > ```
> >
> > Test result data:
> >
> > size64  512 10241500
> > copy1916747 1775988 1600203 1440054
> > page1974058 1953655 1945463 1904478
> > percent 3.0%10.0%   21.58%  32.3%
> >
> > Signed-off-by: Xuan Zhuo 
> > Reviewed-by: Dust Li 
> > ---
> >  net/xdp/xsk.c | 104 
> > --
> >  1 file changed, 86 insertions(+), 18 deletions(-)
>
> Now I like the result, thanks!
>
> But Patchwork still display your series incorrectly (messages 0 and 1
> are missing). I'm concerning maintainers may not take this in such
> form. Try to pass the folder's name, not folder/*.patch to
> git send-email when sending, and don't use --in-reply-to when sending
> a new iteration.

Xuan,

Please make the new submission of the patch set a v3 even though you
did not change the code. Just so we can clearly see it is the new
submission.

> > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> > index 8037b04..40bac11 100644
> > --- a/net/xdp/xsk.c
> > +++ b/net/xdp/xsk.c
> > @@ -430,6 +430,87 @@ static void xsk_destruct_skb(struct sk_buff *skb)
> >   sock_wfree(skb);
> >  }
> >
> > +static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
> > +   struct xdp_desc *desc)
> > +{
> > + u32 len, offset, copy, copied;
> > + struct sk_buff *skb;
> > + struct page *page;
> > + void *buffer;
> > + int err, i;
> > + u64 addr;
> > +
> > + skb = sock_alloc_send_skb(>sk, 0, 1, );
> > + if (unlikely(!skb))
> > + return ERR_PTR(err);
> > +
> > + addr = desc->addr;
> > + len = desc->len;
> > +
> > + buffer = xsk_buff_raw_get_data(xs->pool, addr);
> > + offset = offset_in_page(buffer);
> > + addr = buffer - xs->pool->addrs;
> > +
> > + for (copied = 0, i = 0; copied < len; i++) {
> > + page = xs->pool->umem->pgs[addr >> PAGE_SHIFT];
> > +
> > + get_page(page);
> > +
> > + copy = min_t(u32, PAGE_SIZE - offset, len - copied);
> > +
> > + skb_fill_page_desc(skb, i, page, offset, copy);
> > +
> > + copied += copy;
> > + addr += copy;
> > + offset = 0;
> > + }
> > +
> > + skb->len += len;
> > + skb->data_len += len;
> > + skb->truesize += len;
> > +
> > + refcount_add(len, >sk.sk_wmem_alloc);
> > +
> > + return skb;
> > +}
> > +
> > +static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
> > +  struct xdp_desc *desc)
> > +{
> > + struct sk_buff *skb = NULL;
> > +
> > + if (xs->dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
> > + skb = xsk_build_skb_zerocopy(xs, desc);
> > + if (IS_ERR(skb))
> > + return skb;
> > + } else {
> > + void *buffer;
> > + u32 len;
> > + int err;
> > +
> > + len = desc->len;
> > + skb = sock_alloc_send_skb(>sk, len, 1, );
> > + if (unlikely(!skb))
> > + return ERR_PTR(err);
> > +
> > + skb_put(skb, len);
> > + buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
> > + err = skb_store_bits(skb, 0, buffer, len);
> > + if (unlikely(err)) {
> > + kfree_skb(skb);
> > + return ERR_PTR(err);
> > + }
> > + }
> > +
> > + skb->dev = xs->dev;
> > + skb->priority = xs->sk.sk_priority;
> > + skb->mark = xs->sk.sk_mark;
> > + skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
> > + skb->destructor = xsk_destruct_skb;
> > +
> > + return skb;
> > +}
> > +
> >  static int xsk_generic_xmit(struct sock *sk)
> >  {
> >   struct xdp_sock *xs = xdp_sk(sk);
> > @@ -446,43 +527,30 @@ static int xsk_generic_xmit(struct sock *sk)
> >   goto out;
> >
> >   while (xskq_cons_peek_desc(xs->tx, , xs->pool)) {
> > - char *buffer;
> > - u64 addr;
> > - u32 len;
> > -
> >   if (max_batch-- == 0) {
> >   err = -EAGAIN;
> >   goto out;
> >   }
> >
> > - len = desc.len;
> > - skb = sock_alloc_send_skb(sk, len, 1, );
> > - if (unlikely(!skb))
> > + skb = 

Re: [PATCH net-next v2 3/3] xsk: build skb by page

2021-01-20 Thread Alexander Lobakin
From: Xuan Zhuo 
Date: Wed, 20 Jan 2021 16:30:56 +0800

> This patch is used to construct skb based on page to save memory copy
> overhead.
> 
> This function is implemented based on IFF_TX_SKB_NO_LINEAR. Only the
> network card priv_flags supports IFF_TX_SKB_NO_LINEAR will use page to
> directly construct skb. If this feature is not supported, it is still
> necessary to copy data to construct skb.
> 
>  Performance Testing 
> 
> The test environment is Aliyun ECS server.
> Test cmd:
> ```
> xdpsock -i eth0 -t  -S -s 
> ```
> 
> Test result data:
> 
> size64  512 10241500
> copy1916747 1775988 1600203 1440054
> page1974058 1953655 1945463 1904478
> percent 3.0%10.0%   21.58%  32.3%
> 
> Signed-off-by: Xuan Zhuo 
> Reviewed-by: Dust Li 
> ---
>  net/xdp/xsk.c | 104 
> --
>  1 file changed, 86 insertions(+), 18 deletions(-)

Now I like the result, thanks!

But Patchwork still display your series incorrectly (messages 0 and 1
are missing). I'm concerning maintainers may not take this in such
form. Try to pass the folder's name, not folder/*.patch to
git send-email when sending, and don't use --in-reply-to when sending
a new iteration.

> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index 8037b04..40bac11 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -430,6 +430,87 @@ static void xsk_destruct_skb(struct sk_buff *skb)
>   sock_wfree(skb);
>  }
> 
> +static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
> +   struct xdp_desc *desc)
> +{
> + u32 len, offset, copy, copied;
> + struct sk_buff *skb;
> + struct page *page;
> + void *buffer;
> + int err, i;
> + u64 addr;
> +
> + skb = sock_alloc_send_skb(>sk, 0, 1, );
> + if (unlikely(!skb))
> + return ERR_PTR(err);
> +
> + addr = desc->addr;
> + len = desc->len;
> +
> + buffer = xsk_buff_raw_get_data(xs->pool, addr);
> + offset = offset_in_page(buffer);
> + addr = buffer - xs->pool->addrs;
> +
> + for (copied = 0, i = 0; copied < len; i++) {
> + page = xs->pool->umem->pgs[addr >> PAGE_SHIFT];
> +
> + get_page(page);
> +
> + copy = min_t(u32, PAGE_SIZE - offset, len - copied);
> +
> + skb_fill_page_desc(skb, i, page, offset, copy);
> +
> + copied += copy;
> + addr += copy;
> + offset = 0;
> + }
> +
> + skb->len += len;
> + skb->data_len += len;
> + skb->truesize += len;
> +
> + refcount_add(len, >sk.sk_wmem_alloc);
> +
> + return skb;
> +}
> +
> +static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
> +  struct xdp_desc *desc)
> +{
> + struct sk_buff *skb = NULL;
> +
> + if (xs->dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
> + skb = xsk_build_skb_zerocopy(xs, desc);
> + if (IS_ERR(skb))
> + return skb;
> + } else {
> + void *buffer;
> + u32 len;
> + int err;
> +
> + len = desc->len;
> + skb = sock_alloc_send_skb(>sk, len, 1, );
> + if (unlikely(!skb))
> + return ERR_PTR(err);
> +
> + skb_put(skb, len);
> + buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
> + err = skb_store_bits(skb, 0, buffer, len);
> + if (unlikely(err)) {
> + kfree_skb(skb);
> + return ERR_PTR(err);
> + }
> + }
> +
> + skb->dev = xs->dev;
> + skb->priority = xs->sk.sk_priority;
> + skb->mark = xs->sk.sk_mark;
> + skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
> + skb->destructor = xsk_destruct_skb;
> +
> + return skb;
> +}
> +
>  static int xsk_generic_xmit(struct sock *sk)
>  {
>   struct xdp_sock *xs = xdp_sk(sk);
> @@ -446,43 +527,30 @@ static int xsk_generic_xmit(struct sock *sk)
>   goto out;
> 
>   while (xskq_cons_peek_desc(xs->tx, , xs->pool)) {
> - char *buffer;
> - u64 addr;
> - u32 len;
> -
>   if (max_batch-- == 0) {
>   err = -EAGAIN;
>   goto out;
>   }
> 
> - len = desc.len;
> - skb = sock_alloc_send_skb(sk, len, 1, );
> - if (unlikely(!skb))
> + skb = xsk_build_skb(xs, );
> + if (IS_ERR(skb)) {
> + err = PTR_ERR(skb);
>   goto out;
> + }
> 
> - skb_put(skb, len);
> - addr = desc.addr;
> - buffer = xsk_buff_raw_get_data(xs->pool, addr);
> - err = skb_store_bits(skb, 0, buffer, len);
>   /* This is the backpressure mechanism for the Tx path.
>* Reserve space in the completion queue and only proceed
>  

[PATCH net-next v2 3/3] xsk: build skb by page

2021-01-20 Thread Xuan Zhuo
This patch is used to construct skb based on page to save memory copy
overhead.

This function is implemented based on IFF_TX_SKB_NO_LINEAR. Only the
network card priv_flags supports IFF_TX_SKB_NO_LINEAR will use page to
directly construct skb. If this feature is not supported, it is still
necessary to copy data to construct skb.

 Performance Testing 

The test environment is Aliyun ECS server.
Test cmd:
```
xdpsock -i eth0 -t  -S -s 
```

Test result data:

size64  512 10241500
copy1916747 1775988 1600203 1440054
page1974058 1953655 1945463 1904478
percent 3.0%10.0%   21.58%  32.3%

Signed-off-by: Xuan Zhuo 
Reviewed-by: Dust Li 
---
 net/xdp/xsk.c | 104 --
 1 file changed, 86 insertions(+), 18 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 8037b04..40bac11 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -430,6 +430,87 @@ static void xsk_destruct_skb(struct sk_buff *skb)
sock_wfree(skb);
 }
 
+static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
+ struct xdp_desc *desc)
+{
+   u32 len, offset, copy, copied;
+   struct sk_buff *skb;
+   struct page *page;
+   void *buffer;
+   int err, i;
+   u64 addr;
+
+   skb = sock_alloc_send_skb(>sk, 0, 1, );
+   if (unlikely(!skb))
+   return ERR_PTR(err);
+
+   addr = desc->addr;
+   len = desc->len;
+
+   buffer = xsk_buff_raw_get_data(xs->pool, addr);
+   offset = offset_in_page(buffer);
+   addr = buffer - xs->pool->addrs;
+
+   for (copied = 0, i = 0; copied < len; i++) {
+   page = xs->pool->umem->pgs[addr >> PAGE_SHIFT];
+
+   get_page(page);
+
+   copy = min_t(u32, PAGE_SIZE - offset, len - copied);
+
+   skb_fill_page_desc(skb, i, page, offset, copy);
+
+   copied += copy;
+   addr += copy;
+   offset = 0;
+   }
+
+   skb->len += len;
+   skb->data_len += len;
+   skb->truesize += len;
+
+   refcount_add(len, >sk.sk_wmem_alloc);
+
+   return skb;
+}
+
+static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
+struct xdp_desc *desc)
+{
+   struct sk_buff *skb = NULL;
+
+   if (xs->dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
+   skb = xsk_build_skb_zerocopy(xs, desc);
+   if (IS_ERR(skb))
+   return skb;
+   } else {
+   void *buffer;
+   u32 len;
+   int err;
+
+   len = desc->len;
+   skb = sock_alloc_send_skb(>sk, len, 1, );
+   if (unlikely(!skb))
+   return ERR_PTR(err);
+
+   skb_put(skb, len);
+   buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
+   err = skb_store_bits(skb, 0, buffer, len);
+   if (unlikely(err)) {
+   kfree_skb(skb);
+   return ERR_PTR(err);
+   }
+   }
+
+   skb->dev = xs->dev;
+   skb->priority = xs->sk.sk_priority;
+   skb->mark = xs->sk.sk_mark;
+   skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
+   skb->destructor = xsk_destruct_skb;
+
+   return skb;
+}
+
 static int xsk_generic_xmit(struct sock *sk)
 {
struct xdp_sock *xs = xdp_sk(sk);
@@ -446,43 +527,30 @@ static int xsk_generic_xmit(struct sock *sk)
goto out;
 
while (xskq_cons_peek_desc(xs->tx, , xs->pool)) {
-   char *buffer;
-   u64 addr;
-   u32 len;
-
if (max_batch-- == 0) {
err = -EAGAIN;
goto out;
}
 
-   len = desc.len;
-   skb = sock_alloc_send_skb(sk, len, 1, );
-   if (unlikely(!skb))
+   skb = xsk_build_skb(xs, );
+   if (IS_ERR(skb)) {
+   err = PTR_ERR(skb);
goto out;
+   }
 
-   skb_put(skb, len);
-   addr = desc.addr;
-   buffer = xsk_buff_raw_get_data(xs->pool, addr);
-   err = skb_store_bits(skb, 0, buffer, len);
/* This is the backpressure mechanism for the Tx path.
 * Reserve space in the completion queue and only proceed
 * if there is space in it. This avoids having to implement
 * any buffering in the Tx path.
 */
spin_lock_irqsave(>pool->cq_lock, flags);
-   if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
+   if (xskq_prod_reserve(xs->pool->cq)) {
spin_unlock_irqrestore(>pool->cq_lock, flags);
kfree_skb(skb);
goto out;
}

Re: [PATCH net-next v2 3/3] xsk: build skb by page

2021-01-20 Thread Michael S. Tsirkin
On Wed, Jan 20, 2021 at 03:11:04AM -0500, Michael S. Tsirkin wrote:
> On Wed, Jan 20, 2021 at 03:50:01PM +0800, Xuan Zhuo wrote:
> > This patch is used to construct skb based on page to save memory copy
> > overhead.
> > 
> > This function is implemented based on IFF_TX_SKB_NO_LINEAR. Only the
> > network card priv_flags supports IFF_TX_SKB_NO_LINEAR will use page to
> > directly construct skb. If this feature is not supported, it is still
> > necessary to copy data to construct skb.
> > 
> >  Performance Testing 
> > 
> > The test environment is Aliyun ECS server.
> > Test cmd:
> > ```
> > xdpsock -i eth0 -t  -S -s 
> > ```
> > 
> > Test result data:
> > 
> > size64  512 10241500
> > copy1916747 1775988 1600203 1440054
> > page1974058 1953655 1945463 1904478
> > percent 3.0%10.0%   21.58%  32.3%
> > 
> > Signed-off-by: Xuan Zhuo 
> > Reviewed-by: Dust Li 
> 
> I can't see the cover letter or 1/3 in this series - was probably
> threaded incorrectly?


Hmm looked again and now I do see them. My mistake pls ignore.

> 
> > ---
> >  net/xdp/xsk.c | 104 
> > --
> >  1 file changed, 86 insertions(+), 18 deletions(-)
> > 
> > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> > index 8037b04..817a3a5 100644
> > --- a/net/xdp/xsk.c
> > +++ b/net/xdp/xsk.c
> > @@ -430,6 +430,87 @@ static void xsk_destruct_skb(struct sk_buff *skb)
> > sock_wfree(skb);
> >  }
> >  
> > +static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
> > + struct xdp_desc *desc)
> > +{
> > +   u32 len, offset, copy, copied;
> > +   struct sk_buff *skb;
> > +   struct page *page;
> > +   char *buffer;
> 
> Actually, make this void *, this way you will not need
> casts down the road. I know this is from xsk_generic_xmit -
> I don't know why it's char * there, either.
> 
> > +   int err, i;
> > +   u64 addr;
> > +
> > +   skb = sock_alloc_send_skb(>sk, 0, 1, );
> > +   if (unlikely(!skb))
> > +   return ERR_PTR(err);
> > +
> > +   addr = desc->addr;
> > +   len = desc->len;
> > +
> > +   buffer = xsk_buff_raw_get_data(xs->pool, addr);
> > +   offset = offset_in_page(buffer);
> > +   addr = buffer - (char *)xs->pool->addrs;
> > +
> > +   for (copied = 0, i = 0; copied < len; i++) {
> > +   page = xs->pool->umem->pgs[addr >> PAGE_SHIFT];
> > +
> > +   get_page(page);
> > +
> > +   copy = min_t(u32, PAGE_SIZE - offset, len - copied);
> > +
> > +   skb_fill_page_desc(skb, i, page, offset, copy);
> > +
> > +   copied += copy;
> > +   addr += copy;
> > +   offset = 0;
> > +   }
> > +
> > +   skb->len += len;
> > +   skb->data_len += len;
> > +   skb->truesize += len;
> > +
> > +   refcount_add(len, >sk.sk_wmem_alloc);
> > +
> > +   return skb;
> > +}
> > +
> > +static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
> > +struct xdp_desc *desc)
> > +{
> > +   struct sk_buff *skb = NULL;
> > +
> > +   if (xs->dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
> > +   skb = xsk_build_skb_zerocopy(xs, desc);
> > +   if (IS_ERR(skb))
> > +   return skb;
> > +   } else {
> > +   char *buffer;
> > +   u32 len;
> > +   int err;
> > +
> > +   len = desc->len;
> > +   skb = sock_alloc_send_skb(>sk, len, 1, );
> > +   if (unlikely(!skb))
> > +   return ERR_PTR(err);
> > +
> > +   skb_put(skb, len);
> > +   buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
> > +   err = skb_store_bits(skb, 0, buffer, len);
> > +   if (unlikely(err)) {
> > +   kfree_skb(skb);
> > +   return ERR_PTR(err);
> > +   }
> > +   }
> > +
> > +   skb->dev = xs->dev;
> > +   skb->priority = xs->sk.sk_priority;
> > +   skb->mark = xs->sk.sk_mark;
> > +   skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
> > +   skb->destructor = xsk_destruct_skb;
> > +
> > +   return skb;
> > +}
> > +
> >  static int xsk_generic_xmit(struct sock *sk)
> >  {
> > struct xdp_sock *xs = xdp_sk(sk);
> > @@ -446,43 +527,30 @@ static int xsk_generic_xmit(struct sock *sk)
> > goto out;
> >  
> > while (xskq_cons_peek_desc(xs->tx, , xs->pool)) {
> > -   char *buffer;
> > -   u64 addr;
> > -   u32 len;
> > -
> > if (max_batch-- == 0) {
> > err = -EAGAIN;
> > goto out;
> > }
> >  
> > -   len = desc.len;
> > -   skb = sock_alloc_send_skb(sk, len, 1, );
> > -   if (unlikely(!skb))
> > +   skb = xsk_build_skb(xs, );
> > +   if (IS_ERR(skb)) {
> > +   err = PTR_ERR(skb);
> > goto out;
> > +   }
> >  
> > -   skb_put(skb, len);
> > -   addr = desc.addr;
> > -   buffer = 

Re: [PATCH net-next v2 3/3] xsk: build skb by page

2021-01-20 Thread Michael S. Tsirkin
On Wed, Jan 20, 2021 at 03:50:01PM +0800, Xuan Zhuo wrote:
> This patch is used to construct skb based on page to save memory copy
> overhead.
> 
> This function is implemented based on IFF_TX_SKB_NO_LINEAR. Only the
> network card priv_flags supports IFF_TX_SKB_NO_LINEAR will use page to
> directly construct skb. If this feature is not supported, it is still
> necessary to copy data to construct skb.
> 
>  Performance Testing 
> 
> The test environment is Aliyun ECS server.
> Test cmd:
> ```
> xdpsock -i eth0 -t  -S -s 
> ```
> 
> Test result data:
> 
> size64  512 10241500
> copy1916747 1775988 1600203 1440054
> page1974058 1953655 1945463 1904478
> percent 3.0%10.0%   21.58%  32.3%
> 
> Signed-off-by: Xuan Zhuo 
> Reviewed-by: Dust Li 

I can't see the cover letter or 1/3 in this series - was probably
threaded incorrectly?


> ---
>  net/xdp/xsk.c | 104 
> --
>  1 file changed, 86 insertions(+), 18 deletions(-)
> 
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index 8037b04..817a3a5 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -430,6 +430,87 @@ static void xsk_destruct_skb(struct sk_buff *skb)
>   sock_wfree(skb);
>  }
>  
> +static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
> +   struct xdp_desc *desc)
> +{
> + u32 len, offset, copy, copied;
> + struct sk_buff *skb;
> + struct page *page;
> + char *buffer;

Actually, make this void *, this way you will not need
casts down the road. I know this is from xsk_generic_xmit -
I don't know why it's char * there, either.

> + int err, i;
> + u64 addr;
> +
> + skb = sock_alloc_send_skb(>sk, 0, 1, );
> + if (unlikely(!skb))
> + return ERR_PTR(err);
> +
> + addr = desc->addr;
> + len = desc->len;
> +
> + buffer = xsk_buff_raw_get_data(xs->pool, addr);
> + offset = offset_in_page(buffer);
> + addr = buffer - (char *)xs->pool->addrs;
> +
> + for (copied = 0, i = 0; copied < len; i++) {
> + page = xs->pool->umem->pgs[addr >> PAGE_SHIFT];
> +
> + get_page(page);
> +
> + copy = min_t(u32, PAGE_SIZE - offset, len - copied);
> +
> + skb_fill_page_desc(skb, i, page, offset, copy);
> +
> + copied += copy;
> + addr += copy;
> + offset = 0;
> + }
> +
> + skb->len += len;
> + skb->data_len += len;
> + skb->truesize += len;
> +
> + refcount_add(len, >sk.sk_wmem_alloc);
> +
> + return skb;
> +}
> +
> +static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
> +  struct xdp_desc *desc)
> +{
> + struct sk_buff *skb = NULL;
> +
> + if (xs->dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
> + skb = xsk_build_skb_zerocopy(xs, desc);
> + if (IS_ERR(skb))
> + return skb;
> + } else {
> + char *buffer;
> + u32 len;
> + int err;
> +
> + len = desc->len;
> + skb = sock_alloc_send_skb(>sk, len, 1, );
> + if (unlikely(!skb))
> + return ERR_PTR(err);
> +
> + skb_put(skb, len);
> + buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
> + err = skb_store_bits(skb, 0, buffer, len);
> + if (unlikely(err)) {
> + kfree_skb(skb);
> + return ERR_PTR(err);
> + }
> + }
> +
> + skb->dev = xs->dev;
> + skb->priority = xs->sk.sk_priority;
> + skb->mark = xs->sk.sk_mark;
> + skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
> + skb->destructor = xsk_destruct_skb;
> +
> + return skb;
> +}
> +
>  static int xsk_generic_xmit(struct sock *sk)
>  {
>   struct xdp_sock *xs = xdp_sk(sk);
> @@ -446,43 +527,30 @@ static int xsk_generic_xmit(struct sock *sk)
>   goto out;
>  
>   while (xskq_cons_peek_desc(xs->tx, , xs->pool)) {
> - char *buffer;
> - u64 addr;
> - u32 len;
> -
>   if (max_batch-- == 0) {
>   err = -EAGAIN;
>   goto out;
>   }
>  
> - len = desc.len;
> - skb = sock_alloc_send_skb(sk, len, 1, );
> - if (unlikely(!skb))
> + skb = xsk_build_skb(xs, );
> + if (IS_ERR(skb)) {
> + err = PTR_ERR(skb);
>   goto out;
> + }
>  
> - skb_put(skb, len);
> - addr = desc.addr;
> - buffer = xsk_buff_raw_get_data(xs->pool, addr);
> - err = skb_store_bits(skb, 0, buffer, len);
>   /* This is the backpressure mechanism for the Tx path.
>* Reserve space in the completion queue and only proceed
>* if there is space in it. This avoids 

[PATCH net-next v2 3/3] xsk: build skb by page

2021-01-19 Thread Xuan Zhuo
This patch is used to construct skb based on page to save memory copy
overhead.

This function is implemented based on IFF_TX_SKB_NO_LINEAR. Only the
network card priv_flags supports IFF_TX_SKB_NO_LINEAR will use page to
directly construct skb. If this feature is not supported, it is still
necessary to copy data to construct skb.

 Performance Testing 

The test environment is Aliyun ECS server.
Test cmd:
```
xdpsock -i eth0 -t  -S -s 
```

Test result data:

size64  512 10241500
copy1916747 1775988 1600203 1440054
page1974058 1953655 1945463 1904478
percent 3.0%10.0%   21.58%  32.3%

Signed-off-by: Xuan Zhuo 
Reviewed-by: Dust Li 
---
 net/xdp/xsk.c | 104 --
 1 file changed, 86 insertions(+), 18 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 8037b04..817a3a5 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -430,6 +430,87 @@ static void xsk_destruct_skb(struct sk_buff *skb)
sock_wfree(skb);
 }
 
+static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
+ struct xdp_desc *desc)
+{
+   u32 len, offset, copy, copied;
+   struct sk_buff *skb;
+   struct page *page;
+   char *buffer;
+   int err, i;
+   u64 addr;
+
+   skb = sock_alloc_send_skb(>sk, 0, 1, );
+   if (unlikely(!skb))
+   return ERR_PTR(err);
+
+   addr = desc->addr;
+   len = desc->len;
+
+   buffer = xsk_buff_raw_get_data(xs->pool, addr);
+   offset = offset_in_page(buffer);
+   addr = buffer - (char *)xs->pool->addrs;
+
+   for (copied = 0, i = 0; copied < len; i++) {
+   page = xs->pool->umem->pgs[addr >> PAGE_SHIFT];
+
+   get_page(page);
+
+   copy = min_t(u32, PAGE_SIZE - offset, len - copied);
+
+   skb_fill_page_desc(skb, i, page, offset, copy);
+
+   copied += copy;
+   addr += copy;
+   offset = 0;
+   }
+
+   skb->len += len;
+   skb->data_len += len;
+   skb->truesize += len;
+
+   refcount_add(len, >sk.sk_wmem_alloc);
+
+   return skb;
+}
+
+static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
+struct xdp_desc *desc)
+{
+   struct sk_buff *skb = NULL;
+
+   if (xs->dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
+   skb = xsk_build_skb_zerocopy(xs, desc);
+   if (IS_ERR(skb))
+   return skb;
+   } else {
+   char *buffer;
+   u32 len;
+   int err;
+
+   len = desc->len;
+   skb = sock_alloc_send_skb(>sk, len, 1, );
+   if (unlikely(!skb))
+   return ERR_PTR(err);
+
+   skb_put(skb, len);
+   buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
+   err = skb_store_bits(skb, 0, buffer, len);
+   if (unlikely(err)) {
+   kfree_skb(skb);
+   return ERR_PTR(err);
+   }
+   }
+
+   skb->dev = xs->dev;
+   skb->priority = xs->sk.sk_priority;
+   skb->mark = xs->sk.sk_mark;
+   skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
+   skb->destructor = xsk_destruct_skb;
+
+   return skb;
+}
+
 static int xsk_generic_xmit(struct sock *sk)
 {
struct xdp_sock *xs = xdp_sk(sk);
@@ -446,43 +527,30 @@ static int xsk_generic_xmit(struct sock *sk)
goto out;
 
while (xskq_cons_peek_desc(xs->tx, , xs->pool)) {
-   char *buffer;
-   u64 addr;
-   u32 len;
-
if (max_batch-- == 0) {
err = -EAGAIN;
goto out;
}
 
-   len = desc.len;
-   skb = sock_alloc_send_skb(sk, len, 1, );
-   if (unlikely(!skb))
+   skb = xsk_build_skb(xs, );
+   if (IS_ERR(skb)) {
+   err = PTR_ERR(skb);
goto out;
+   }
 
-   skb_put(skb, len);
-   addr = desc.addr;
-   buffer = xsk_buff_raw_get_data(xs->pool, addr);
-   err = skb_store_bits(skb, 0, buffer, len);
/* This is the backpressure mechanism for the Tx path.
 * Reserve space in the completion queue and only proceed
 * if there is space in it. This avoids having to implement
 * any buffering in the Tx path.
 */
spin_lock_irqsave(>pool->cq_lock, flags);
-   if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
+   if (xskq_prod_reserve(xs->pool->cq)) {
spin_unlock_irqrestore(>pool->cq_lock, flags);
kfree_skb(skb);
goto out;
}