Author: jhb
Date: Sat Jun 29 00:49:35 2019
New Revision: 349530
URL: https://svnweb.freebsd.org/changeset/base/349530

Log:
  Add support for using unmapped mbufs with sendfile(2).
  
  This can be enabled at runtime via the kern.ipc.mb_use_ext_pgs sysctl.
  It is disabled by default.
  
  Submitted by: gallatin (earlier version)
  Reviewed by:  gallatin, hselasky, rrs
  Relnotes:     yes
  Sponsored by: Netflix
  Differential Revision:        https://reviews.freebsd.org/D20616

Modified:
  head/sys/kern/kern_mbuf.c
  head/sys/kern/kern_sendfile.c
  head/sys/sys/mbuf.h

Modified: head/sys/kern/kern_mbuf.c
==============================================================================
--- head/sys/kern/kern_mbuf.c   Sat Jun 29 00:48:33 2019        (r349529)
+++ head/sys/kern/kern_mbuf.c   Sat Jun 29 00:49:35 2019        (r349530)
@@ -112,6 +112,11 @@ int nmbjumbop;                     /* limits number of 
page size jumbo c
 int nmbjumbo9;                 /* limits number of 9k jumbo clusters */
 int nmbjumbo16;                        /* limits number of 16k jumbo clusters 
*/
 
+bool mb_use_ext_pgs;           /* use EXT_PGS mbufs for sendfile */
+SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN,
+    &mb_use_ext_pgs, 0,
+    "Use unmapped mbufs for sendfile(2)");
+
 static quad_t maxmbufmem;      /* overall real memory limit for all mbufs */
 
 SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 
&maxmbufmem, 0,

Modified: head/sys/kern/kern_sendfile.c
==============================================================================
--- head/sys/kern/kern_sendfile.c       Sat Jun 29 00:48:33 2019        
(r349529)
+++ head/sys/kern/kern_sendfile.c       Sat Jun 29 00:49:35 2019        
(r349530)
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
+#include <netinet/in.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
@@ -62,6 +63,7 @@ __FBSDID("$FreeBSD$");
 
 #define        EXT_FLAG_SYNC           EXT_FLAG_VENDOR1
 #define        EXT_FLAG_NOCACHE        EXT_FLAG_VENDOR2
+#define        EXT_FLAG_CACHE_LAST     EXT_FLAG_VENDOR3
 
 /*
  * Structure describing a single sendfile(2) I/O, which may consist of
@@ -201,6 +203,39 @@ sendfile_free_mext(struct mbuf *m)
        }
 }
 
+static void
+sendfile_free_mext_pg(struct mbuf *m)
+{
+       struct mbuf_ext_pgs *ext_pgs;
+       vm_page_t pg;
+       int i;
+       bool nocache, cache_last;
+
+       KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_PGS,
+           ("%s: m %p !M_EXT or !EXT_PGS", __func__, m));
+
+       nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
+       cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST;
+       ext_pgs = m->m_ext.ext_pgs;
+
+       for (i = 0; i < ext_pgs->npgs; i++) {
+               if (cache_last && i == ext_pgs->npgs - 1)
+                       nocache = false;
+               pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+               sendfile_free_page(pg, nocache);
+       }
+
+       if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
+               struct sendfile_sync *sfs = m->m_ext.ext_arg2;
+
+               mtx_lock(&sfs->mtx);
+               KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
+               if (--sfs->count == 0)
+                       cv_signal(&sfs->cv);
+               mtx_unlock(&sfs->mtx);
+       }
+}
+
 /*
  * Helper function to calculate how much data to put into page i of n.
  * Only first and last pages are special.
@@ -283,8 +318,6 @@ sendfile_iodone(void *arg, vm_page_t *pg, int count, i
 
        CURVNET_SET(so->so_vnet);
        if (sfio->error) {
-               struct mbuf *m;
-
                /*
                 * I/O operation failed.  The state of data in the socket
                 * is now inconsistent, and all what we can do is to tear
@@ -299,9 +332,7 @@ sendfile_iodone(void *arg, vm_page_t *pg, int count, i
                so->so_proto->pr_usrreqs->pru_abort(so);
                so->so_error = EIO;
 
-               m = sfio->m;
-               for (int i = 0; i < sfio->npages; i++)
-                       m = m_free(m);
+               mb_free_notready(sfio->m, sfio->npages);
        } else
                (void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m,
                    sfio->npages);
@@ -540,13 +571,15 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *h
        struct vnode *vp;
        struct vm_object *obj;
        struct socket *so;
+       struct mbuf_ext_pgs *ext_pgs;
        struct mbuf *m, *mh, *mhtail;
        struct sf_buf *sf;
        struct shmfd *shmfd;
        struct sendfile_sync *sfs;
        struct vattr va;
        off_t off, sbytes, rem, obj_size;
-       int error, softerr, bsize, hdrlen;
+       int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr;
+       bool use_ext_pgs;
 
        obj = NULL;
        so = NULL;
@@ -554,6 +587,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *h
        sfs = NULL;
        hdrlen = sbytes = 0;
        softerr = 0;
+       use_ext_pgs = false;
 
        error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
        if (error != 0)
@@ -714,6 +748,17 @@ retry_space:
 
                if (space > rem)
                        space = rem;
+               else if (space > PAGE_SIZE) {
+                       /*
+                        * Use page boundaries when possible for large
+                        * requests.
+                        */
+                       if (off & PAGE_MASK)
+                               space -= (PAGE_SIZE - (off & PAGE_MASK));
+                       space = trunc_page(space);
+                       if (off & PAGE_MASK)
+                               space += (PAGE_SIZE - (off & PAGE_MASK));
+               }
 
                npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE);
 
@@ -751,6 +796,22 @@ retry_space:
                 * dumped into socket buffer.
                 */
                pa = sfio->pa;
+
+               /*
+                * Use unmapped mbufs if enabled for TCP.  Unmapped
+                * bufs are restricted to TCP as that is what has been
+                * tested.  In particular, unmapped mbufs have not
+                * been tested with UNIX-domain sockets.
+                */
+               if (mb_use_ext_pgs &&
+                   so->so_proto->pr_protocol == IPPROTO_TCP) {
+                       use_ext_pgs = true;
+                       max_pgs = MBUF_PEXT_MAX_PGS;
+
+                       /* Start at last index, to wrap on first use. */
+                       ext_pgs_idx = max_pgs - 1;
+               }
+
                for (int i = 0; i < npages; i++) {
                        struct mbuf *m0;
 
@@ -764,6 +825,66 @@ retry_space:
                                npages = i;
                                softerr = EBUSY;
                                break;
+                       }
+
+                       if (use_ext_pgs) {
+                               off_t xfs;
+
+                               ext_pgs_idx++;
+                               if (ext_pgs_idx == max_pgs) {
+                                       m0 = mb_alloc_ext_pgs(M_WAITOK, false,
+                                           sendfile_free_mext_pg);
+
+                                       if (flags & SF_NOCACHE) {
+                                               m0->m_ext.ext_flags |=
+                                                   EXT_FLAG_NOCACHE;
+
+                                               /*
+                                                * See comment below regarding
+                                                * ignoring SF_NOCACHE for the
+                                                * last page.
+                                                */
+                                               if ((npages - i <= max_pgs) &&
+                                                   ((off + space) & PAGE_MASK) 
&&
+                                                   (rem > space || rhpages > 
0))
+                                                       m0->m_ext.ext_flags |=
+                                                           EXT_FLAG_CACHE_LAST;
+                                       }
+                                       if (sfs != NULL) {
+                                               m0->m_ext.ext_flags |=
+                                                   EXT_FLAG_SYNC;
+                                               m0->m_ext.ext_arg2 = sfs;
+                                               mtx_lock(&sfs->mtx);
+                                               sfs->count++;
+                                               mtx_unlock(&sfs->mtx);
+                                       }
+                                       ext_pgs = m0->m_ext.ext_pgs;
+                                       if (i == 0)
+                                               sfio->m = m0;
+                                       ext_pgs_idx = 0;
+
+                                       /* Append to mbuf chain. */
+                                       if (mtail != NULL)
+                                               mtail->m_next = m0;
+                                       else
+                                               m = m0;
+                                       mtail = m0;
+                                       ext_pgs->first_pg_off =
+                                           vmoff(i, off) & PAGE_MASK;
+                               }
+                               if (nios) {
+                                       mtail->m_flags |= M_NOTREADY;
+                                       ext_pgs->nrdy++;
+                               }
+
+                               ext_pgs->pa[ext_pgs_idx] = 
VM_PAGE_TO_PHYS(pa[i]);
+                               ext_pgs->npgs++;
+                               xfs = xfsize(i, npages, off, space);
+                               ext_pgs->last_pg_len = xfs;
+                               MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs);
+                               mtail->m_len += xfs;
+                               mtail->m_ext.ext_size += PAGE_SIZE;
+                               continue;
                        }
 
                        /*

Modified: head/sys/sys/mbuf.h
==============================================================================
--- head/sys/sys/mbuf.h Sat Jun 29 00:48:33 2019        (r349529)
+++ head/sys/sys/mbuf.h Sat Jun 29 00:49:35 2019        (r349530)
@@ -1129,6 +1129,7 @@ extern int                max_hdr;        /* Largest link 
+ protocol header
 extern int             max_linkhdr;    /* Largest link-level header */
 extern int             max_protohdr;   /* Largest protocol header */
 extern int             nmbclusters;    /* Maximum number of clusters */
+extern bool            mb_use_ext_pgs; /* Use ext_pgs for sendfile */
 
 /*-
  * Network packets may have annotations attached by affixing a list of
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to