From: Björn Töpel
The XDP_MEM_REG socket option allows a process to register a window of
user space memory to the kernel. This memory will later be used as
frame data buffer.
Signed-off-by: Björn Töpel
---
include/uapi/linux/if_xdp.h | 7 ++
net/xdp/xsk.c | 294 +++-
net/xdp/xsk.h | 19 ++-
3 files changed, 316 insertions(+), 4 deletions(-)
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index cd09232e16c1..3f8c90c708b4 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -29,4 +29,11 @@ struct sockaddr_xdp {
#define XDP_RX_RING2
#define XDP_TX_RING3
+struct xdp_mr_req {
+ __u64 addr; /* Start of packet data area */
+ __u64 len;/* Length of packet data area */
+ __u32 frame_size; /* Frame size */
+ __u32 data_headroom; /* Frame head room */
+};
+
#endif /* _LINUX_IF_XDP_H */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 2d7c08a50c60..333ce1450cc7 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -19,18 +19,235 @@
#include
#include
+#include
+#include
+#include
#include
#include
#include "xsk.h"
+#define XSK_UMEM_MIN_FRAME_SIZE 2048
+
struct xdp_sock {
/* struct sock must be the first member of struct xdp_sock */
struct sock sk;
+ struct xsk_umem *umem;
};
+static struct xdp_sock *xdp_sk(struct sock *sk)
+{
+ return (struct xdp_sock *)sk;
+}
+
+static void xsk_umem_unpin_pages(struct xsk_umem *umem)
+{
+ unsigned int i;
+
+ if (umem->pgs) {
+ for (i = 0; i < umem->npgs; i++) {
+ struct page *page = umem->pgs[i];
+
+ set_page_dirty_lock(page);
+ put_page(page);
+ }
+
+ kfree(umem->pgs);
+ umem->pgs = NULL;
+ }
+}
+
+static void xsk_umem_destroy(struct xsk_umem *umem)
+{
+ struct mm_struct *mm;
+ struct task_struct *task;
+ unsigned long diff;
+
+ if (!umem)
+ return;
+
+ xsk_umem_unpin_pages(umem);
+
+ task = get_pid_task(umem->pid, PIDTYPE_PID);
+ put_pid(umem->pid);
+ if (!task)
+ goto out;
+ mm = get_task_mm(task);
+ put_task_struct(task);
+ if (!mm)
+ goto out;
+
+ diff = umem->size >> PAGE_SHIFT;
+
+ down_write(>mmap_sem);
+ mm->pinned_vm -= diff;
+ up_write(>mmap_sem);
+ mmput(mm);
+out:
+ kfree(umem);
+}
+
+static struct xsk_umem *xsk_umem_create(u64 addr, u64 size, u32 frame_size,
+ u32 data_headroom)
+{
+ struct xsk_umem *umem;
+ unsigned int nframes;
+ int size_chk;
+
+ if (frame_size < XSK_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
+ /* Strictly speaking we could support this, if:
+* - huge pages, or*
+* - using an IOMMU, or
+* - making sure the memory area is consecutive
+* but for now, we simply say "computer says no".
+*/
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!is_power_of_2(frame_size))
+ return ERR_PTR(-EINVAL);
+
+ if (!PAGE_ALIGNED(addr)) {
+ /* Memory area has to be page size aligned. For
+* simplicity, this might change.
+*/
+ return ERR_PTR(-EINVAL);
+ }
+
+ if ((addr + size) < addr)
+ return ERR_PTR(-EINVAL);
+
+ nframes = size / frame_size;
+ if (nframes == 0)
+ return ERR_PTR(-EINVAL);
+
+ data_headroom = ALIGN(data_headroom, 64);
+
+ size_chk = frame_size - data_headroom - XSK_KERNEL_HEADROOM;
+ if (size_chk < 0)
+ return ERR_PTR(-EINVAL);
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ umem->pid = get_task_pid(current, PIDTYPE_PID);
+ umem->size = (size_t)size;
+ umem->address = (unsigned long)addr;
+ umem->frame_size = frame_size;
+ umem->nframes = nframes;
+ umem->data_headroom = data_headroom;
+ umem->pgs = NULL;
+
+ return umem;
+}
+
+static int xsk_umem_pin_pages(struct xsk_umem *umem)
+{
+ unsigned int gup_flags = FOLL_WRITE;
+ long npgs;
+ int err;
+
+ /* XXX Fix so that we don't always pin.
+* "copy to user" from interrupt context, but how?
+*/
+ umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_ATOMIC);
+ if (!umem->pgs)
+ return -ENOMEM;
+
+ npgs = get_user_pages(umem->address, umem->npgs,
+ gup_flags, >pgs[0], NULL);
+ if (npgs != umem->npgs) {
+ if (npgs >= 0) {
+ umem->npgs = npgs;
+