On Wed, Nov 09, 2016 at 03:38:31PM +0800, Jason Wang wrote: > Backlog were used for tuntap rx, but it can only process 1 packet at > one time since it was scheduled during sendmsg() synchronously in > process context. This lead bad cache utilization so this patch tries > to do some batching before call rx NAPI. This is done through: > > - accept MSG_MORE as a hint from sendmsg() caller, if it was set, > batch the packet temporarily in a linked list and submit them all > once MSG_MORE were cleared. > - implement a tuntap specific NAPI handler for processing this kind of > possible batching. (This could be done by extending backlog to > support skb like, but using a tun specific one looks cleaner and > easier for future extension). > > Signed-off-by: Jason Wang <jasow...@redhat.com>
So why do we need an extra queue? This is not what hardware devices do. How about adding the packet to queue unconditionally, deferring signalling until we get sendmsg without MSG_MORE? > --- > drivers/net/tun.c | 71 > ++++++++++++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 65 insertions(+), 6 deletions(-) > > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index 1588469..d40583b 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -74,6 +74,7 @@ > #include <linux/skb_array.h> > > #include <asm/uaccess.h> > +#include <linux/interrupt.h> > > /* Uncomment to enable debugging */ > /* #define TUN_DEBUG 1 */ > @@ -169,6 +170,8 @@ struct tun_file { > struct list_head next; > struct tun_struct *detached; > struct skb_array tx_array; > + struct napi_struct napi; > + struct sk_buff_head process_queue; > }; > > struct tun_flow_entry { > @@ -522,6 +525,8 @@ static void tun_queue_purge(struct tun_file *tfile) > while ((skb = skb_array_consume(&tfile->tx_array)) != NULL) > kfree_skb(skb); > > + skb_queue_purge(&tfile->sk.sk_write_queue); > + skb_queue_purge(&tfile->process_queue); > skb_queue_purge(&tfile->sk.sk_error_queue); > } > > @@ -532,6 +537,11 @@ static void __tun_detach(struct tun_file *tfile, bool > clean) > > tun = rtnl_dereference(tfile->tun); > > + if (tun && clean) { > + napi_disable(&tfile->napi); > + netif_napi_del(&tfile->napi); > + } > + > if (tun && !tfile->detached) { > u16 index = tfile->queue_index; > BUG_ON(index >= tun->numqueues); > @@ -587,6 +597,7 @@ static void tun_detach_all(struct net_device *dev) > > for (i = 0; i < n; i++) { > tfile = rtnl_dereference(tun->tfiles[i]); > + napi_disable(&tfile->napi); > BUG_ON(!tfile); > tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; > tfile->socket.sk->sk_data_ready(tfile->socket.sk); > @@ -603,6 +614,7 @@ static void tun_detach_all(struct net_device *dev) > synchronize_net(); > for (i = 0; i < n; i++) { > tfile = rtnl_dereference(tun->tfiles[i]); > + netif_napi_del(&tfile->napi); > /* Drop read queue */ > tun_queue_purge(tfile); > sock_put(&tfile->sk); > @@ -618,6 +630,41 @@ static void tun_detach_all(struct net_device *dev) > module_put(THIS_MODULE); > } > > +static int tun_poll(struct napi_struct *napi, int budget) > +{ > + struct tun_file *tfile = container_of(napi, struct tun_file, napi); > + struct sk_buff_head *input_queue = > + &tfile->socket.sk->sk_write_queue; > + struct sk_buff *skb; > + unsigned int received = 0; > + > + while (1) { > + while ((skb = __skb_dequeue(&tfile->process_queue))) { > + netif_receive_skb(skb); > + if (++received >= budget) > + return received; > + } > + > + spin_lock(&input_queue->lock); > + if (skb_queue_empty(input_queue)) { > + spin_unlock(&input_queue->lock); > + break; > + } > + skb_queue_splice_tail_init(input_queue, &tfile->process_queue); > + spin_unlock(&input_queue->lock); > + } > + > + if (received < budget) { > + napi_complete(napi); > + if (skb_peek(&tfile->socket.sk->sk_write_queue) && > + unlikely(napi_schedule_prep(napi))) { > + __napi_schedule(napi); > + } > + } > + > + return received; > +} > + > static int tun_attach(struct tun_struct *tun, struct file *file, bool > skip_filter) > { > struct tun_file *tfile = file->private_data; > @@ -666,9 +713,11 @@ static int tun_attach(struct tun_struct *tun, struct > file *file, bool skip_filte > > if (tfile->detached) > tun_enable_queue(tfile); > - else > + else { > sock_hold(&tfile->sk); > - > + netif_napi_add(tun->dev, &tfile->napi, tun_poll, 64); > + napi_enable(&tfile->napi); > + } > tun_set_real_num_queues(tun); > > /* device is allowed to go away first, so no need to hold extra > @@ -1150,7 +1199,7 @@ static struct sk_buff *tun_alloc_skb(struct tun_file > *tfile, > /* Get packet from user space buffer */ > static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, > void *msg_control, struct iov_iter *from, > - int noblock) > + int noblock, bool more) > { > struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; > struct sk_buff *skb; > @@ -1296,7 +1345,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, > struct tun_file *tfile, > skb_probe_transport_header(skb, 0); > > rxhash = skb_get_hash(skb); > - netif_rx_ni(skb); > + skb_queue_tail(&tfile->socket.sk->sk_write_queue, skb); > + > + if (!more) { > + local_bh_disable(); > + napi_schedule(&tfile->napi); > + local_bh_enable(); Why do we need to disable bh here? I thought napi_schedule can be called from any context. > + } > > stats = get_cpu_ptr(tun->pcpu_stats); > u64_stats_update_begin(&stats->syncp); > @@ -1319,7 +1374,8 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, > struct iov_iter *from) > if (!tun) > return -EBADFD; > > - result = tun_get_user(tun, tfile, NULL, from, file->f_flags & > O_NONBLOCK); > + result = tun_get_user(tun, tfile, NULL, from, > + file->f_flags & O_NONBLOCK, false); > > tun_put(tun); > return result; > @@ -1579,7 +1635,8 @@ static int tun_sendmsg(struct socket *sock, struct > msghdr *m, size_t total_len) > return -EBADFD; > > ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter, > - m->msg_flags & MSG_DONTWAIT); > + m->msg_flags & MSG_DONTWAIT, > + m->msg_flags & MSG_MORE); > tun_put(tun); > return ret; > } > @@ -2336,6 +2393,8 @@ static int tun_chr_open(struct inode *inode, struct > file * file) > file->private_data = tfile; > INIT_LIST_HEAD(&tfile->next); > > + skb_queue_head_init(&tfile->process_queue); > + > sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); > > return 0; > -- > 2.7.4