Add a TUN/TAP receive mode that exercises the napi_gro_frags()
interface. This mode is available only in TAP mode, as the interface
expects packets with Ethernet headers.
Furthermore, packets follow the layout of the iovec_iter that was
received. The first iovec is the linear data, and every one after the
first is a fragment. If there are more fragments than the max number,
drop the packet. Additionally, invoke eth_get_headlen() to exercise flow
dissector code and to verify that the header resides in the linear data.
The napi_gro_frags() mode requires setting the IFF_NAPI_FRAGS option.
This is imposed because this mode is intended for testing via tools like
syzkaller and packetdrill, and the increased flexibility it provides can
introduce security vulnerabilities.
Signed-off-by: Petar Penkov
Cc: Eric Dumazet
Cc: Mahesh Bandewar
Cc: Willem de Bruijn
Cc: da...@davemloft.net
Cc: ppen...@stanford.edu
---
drivers/net/tun.c | 135 ++--
include/uapi/linux/if_tun.h | 1 +
2 files changed, 130 insertions(+), 6 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d5c824e3ec42..2ba9809ab6cd 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -75,6 +75,7 @@
#include
#include
#include
+#include
#include
@@ -120,8 +121,15 @@ do {
\
#define TUN_VNET_LE 0x8000
#define TUN_VNET_BE 0x4000
+#if IS_ENABLED(CONFIG_TUN_NAPI)
+#define TUN_FEATURES_EXTRA IFF_NAPI_FRAGS
+#else
+#define TUN_FEATURES_EXTRA 0
+#endif
+
#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
- IFF_MULTI_QUEUE)
+ IFF_MULTI_QUEUE | TUN_FEATURES_EXTRA)
+
#define GOODCOPY_LEN 128
#define FLT_EXACT_COUNT 8
@@ -173,6 +181,7 @@ struct tun_file {
unsigned int ifindex;
};
struct napi_struct napi;
+ struct mutex napi_mutex;/* Protects access to the above napi */
struct list_head next;
struct tun_struct *detached;
struct skb_array tx_array;
@@ -276,6 +285,7 @@ static void tun_napi_init(struct tun_struct *tun, struct
tun_file *tfile)
netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
NAPI_POLL_WEIGHT);
napi_enable(&tfile->napi);
+ mutex_init(&tfile->napi_mutex);
}
}
@@ -291,6 +301,11 @@ static void tun_napi_del(struct tun_file *tfile)
netif_napi_del(&tfile->napi);
}
+static bool tun_napi_frags_enabled(const struct tun_struct *tun)
+{
+ return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS;
+}
+
#ifdef CONFIG_TUN_VNET_CROSS_LE
static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
{
@@ -1034,7 +1049,8 @@ static void tun_poll_controller(struct net_device *dev)
* supports polling, which enables bridge devices in virt setups to
* still use netconsole
* If NAPI is enabled, however, we need to schedule polling for all
-* queues.
+* queues unless we are using napi_gro_frags(), which we call in
+* process context and not in NAPI context.
*/
if (IS_ENABLED(CONFIG_TUN_NAPI)) {
@@ -1042,6 +1058,9 @@ static void tun_poll_controller(struct net_device *dev)
struct tun_file *tfile;
int i;
+ if (tun_napi_frags_enabled(tun))
+ return;
+
rcu_read_lock();
for (i = 0; i < tun->numqueues; i++) {
tfile = rcu_dereference(tun->tfiles[i]);
@@ -1264,6 +1283,64 @@ static unsigned int tun_chr_poll(struct file *file,
poll_table *wait)
return mask;
}
+static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
+ size_t len,
+ const struct iov_iter *it)
+{
+ struct sk_buff *skb;
+ size_t linear;
+ int err;
+ int i;
+
+ if (it->nr_segs > MAX_SKB_FRAGS + 1)
+ return ERR_PTR(-ENOMEM);
+
+ local_bh_disable();
+ skb = napi_get_frags(&tfile->napi);
+ local_bh_enable();
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ linear = iov_iter_single_seg_count(it);
+ err = __skb_grow(skb, linear);
+ if (err)
+ goto free;
+
+ skb->len = len;
+ skb->data_len = len - linear;
+ skb->truesize += skb->data_len;
+
+ for (i = 1; i < it->nr_segs; i++) {
+ size_t fragsz = it->iov[i].iov_len;
+ unsigned long offset;
+ struct page *page;
+ void *data;
+
+ if (fragsz == 0 || fragsz > PAGE_SIZE) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ local_bh_disable();
+ data = napi_alloc_frag(fragsz);
+ local_bh_enable(