Module Name: src Committed By: jdolecek Date: Thu Apr 30 11:23:44 UTC 2020
Modified Files: src/sys/arch/xen/xen: xennetback_xenbus.c Log Message: add support for scatter-gather when accepting packets on frontend Tx path (frontend -> backend) don't enable ETHERCAP_JUMBO_MTU nor feature-sg yet, need to implement support also for the frontend Rx side To generate a diff of this commit: cvs rdiff -u -r1.98 -r1.99 src/sys/arch/xen/xen/xennetback_xenbus.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/xen/xen/xennetback_xenbus.c diff -u src/sys/arch/xen/xen/xennetback_xenbus.c:1.98 src/sys/arch/xen/xen/xennetback_xenbus.c:1.99 --- src/sys/arch/xen/xen/xennetback_xenbus.c:1.98 Sun Apr 26 13:09:52 2020 +++ src/sys/arch/xen/xen/xennetback_xenbus.c Thu Apr 30 11:23:44 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: xennetback_xenbus.c,v 1.98 2020/04/26 13:09:52 jdolecek Exp $ */ +/* $NetBSD: xennetback_xenbus.c,v 1.99 2020/04/30 11:23:44 jdolecek Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xennetback_xenbus.c,v 1.98 2020/04/26 13:09:52 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xennetback_xenbus.c,v 1.99 2020/04/30 11:23:44 jdolecek Exp $"); #include "opt_xen.h" @@ -78,6 +78,7 @@ __KERNEL_RCSID(0, "$NetBSD: xennetback_x * transmit at once). */ #define NB_XMIT_PAGES_BATCH 64 +CTASSERT(NB_XMIT_PAGES_BATCH >= XEN_NETIF_NR_SLOTS_MIN); /* ratecheck(9) for pool allocation failures */ static const struct timeval xni_pool_errintvl = { 30, 0 }; /* 30s, each */ @@ -91,9 +92,10 @@ typedef enum { struct xnetback_xstate { bus_dmamap_t xs_dmamap; + bool xs_loaded; struct mbuf *xs_m; - int xs_id; - int xs_flags; + struct netif_tx_request xs_tx; + uint16_t xs_tx_size; /* Size of data in this Tx fragment */ }; /* we keep the xnetback instances in a linked list */ @@ -235,8 +237,9 @@ xennetback_xenbus_create(struct xenbus_d /* Initialize DMA map, used only for loading PA */ for (i = 0; i < __arraycount(xneti->xni_xstate); i++) { - if (bus_dmamap_create(xneti->xni_xbusd->xbusd_dmat, PAGE_SIZE, - 1, PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, + if (bus_dmamap_create(xneti->xni_xbusd->xbusd_dmat, + ETHER_MAX_LEN_JUMBO, XEN_NETIF_NR_SLOTS_MIN, + PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &xneti->xni_xstate[i].xs_dmamap) != 0) { aprint_error_ifnet(ifp, @@ -249,7 +252,11 @@ xennetback_xenbus_create(struct xenbus_d /* create pseudo-interface */ aprint_verbose_ifnet(ifp, "Ethernet address %s\n", ether_sprintf(xneti->xni_enaddr)); - xneti->xni_ec.ec_capabilities |= ETHERCAP_VLAN_MTU; + xneti->xni_ec.ec_capabilities |= ETHERCAP_VLAN_MTU +#ifdef notyet + | ETHERCAP_JUMBO_MTU +#endif + ; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_snd.ifq_maxlen = uimax(ifqmaxlen, NET_TX_RING_SIZE * 2); @@ -309,6 +316,16 @@ xennetback_xenbus_create(struct xenbus_d xbusd->xbusd_path, err); goto abort_xbt; } +#if notyet + err = xenbus_printf(xbt, xbusd->xbusd_path, + "feature-sg", "%d", 1); + if (err) { + aprint_error_ifnet(ifp, + "failed to write %s/feature-sg: %d\n", + xbusd->xbusd_path, err); + goto abort_xbt; + } +#endif } while ((err = xenbus_transaction_end(xbt, 0)) == EAGAIN); if (err) { aprint_error_ifnet(ifp, @@ -624,80 +641,182 @@ xennetback_tx_response(struct xnetback_i } } -static inline const char * -xennetback_tx_check_packet(const netif_tx_request_t *txreq, int vlan) +static const char * +xennetback_tx_check_packet(const netif_tx_request_t *txreq) { - if (__predict_false(txreq->size < ETHER_HDR_LEN)) - return "too small"; - - if (__predict_false(txreq->offset + txreq->size > PAGE_SIZE)) - return "crossing a page boundary"; + if (__predict_false((txreq->flags & NETTXF_more_data) == 0 && + txreq->offset + txreq->size > PAGE_SIZE)) + return "crossing page boundary"; - int maxlen = ETHER_MAX_LEN - ETHER_CRC_LEN; - if (vlan) - maxlen += ETHER_VLAN_ENCAP_LEN; - if (__predict_false(txreq->size > maxlen)) - return "too big"; - - /* Somewhat duplicit, MCLBYTES is > ETHER_MAX_LEN */ - if (__predict_false(txreq->size > MCLBYTES)) - return "bigger than MCLBYTES"; + if (__predict_false(txreq->size > ETHER_MAX_LEN_JUMBO)) + return "bigger then jumbo"; return NULL; } -static void -xennetback_tx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti, - int queued) +static int +xennetback_copy(struct ifnet *ifp, gnttab_copy_t *gop, int copycnt) { - int i = 0; - gnttab_copy_t *gop; - struct xnetback_xstate *xst; - /* * Copy the data and ack it. Delaying it until the mbuf is * freed will stall transmit. */ - if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xneti->xni_gop_copy, - queued) != 0) { + if (HYPERVISOR_grant_table_op(GNTTABOP_copy, gop, copycnt) != 0) { printf("%s: GNTTABOP_copy Tx failed", ifp->if_xname); - goto abort; + return EINVAL; } - for (; i < queued; i++) { - gop = &xneti->xni_gop_copy[i]; - xst = &xneti->xni_xstate[i]; - + for (int i = 0; i < copycnt; i++) { if (gop->status != GNTST_okay) { printf("%s GNTTABOP_copy[%d] Tx %d\n", ifp->if_xname, i, gop->status); - goto abort; + return EINVAL; } + } - xennetback_tx_response(xneti, xst->xs_id, NETIF_RSP_OKAY); + return 0; +} - if (xst->xs_flags & NETTXF_csum_blank) - xennet_checksum_fill(ifp, xst->xs_m); - else if (xst->xs_flags & NETTXF_data_validated) - xst->xs_m->m_pkthdr.csum_flags = XN_M_CSUM_SUPPORTED; - m_set_rcvif(xst->xs_m, ifp); +static void +xennetback_tx_copy_abort(struct ifnet *ifp, struct xnetback_instance *xneti, + int queued) +{ + struct xnetback_xstate *xst; - if_percpuq_enqueue(ifp->if_percpuq, xst->xs_m); + for (int i = 0; i < queued; i++) { + xst = &xneti->xni_xstate[i]; - bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat, - xst->xs_dmamap); + if (xst->xs_loaded) { + KASSERT(xst->xs_m != NULL); + bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat, + xst->xs_dmamap); + xst->xs_loaded = false; + m_freem(xst->xs_m); + } + + xennetback_tx_response(xneti, xst->xs_tx.id, NETIF_RSP_ERROR); + if_statinc(ifp, if_ierrors); } +} - return; +static void +xennetback_tx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti, + int queued) +{ + gnttab_copy_t *gop; + struct xnetback_xstate *xst; + int copycnt = 0, seg = 0; + size_t goff = 0, segoff = 0, gsize, take; + bus_dmamap_t dm = NULL; + paddr_t ma; -abort: - for (; i < queued; i++) { + for (int i = 0; i < queued; i++) { xst = &xneti->xni_xstate[i]; - m_freem(xst->xs_m); - xennetback_tx_response(xneti, xst->xs_id, NETIF_RSP_ERROR); - if_statinc(ifp, if_ierrors); + if (xst->xs_m != NULL) { + KASSERT(xst->xs_m->m_pkthdr.len == xst->xs_tx.size); + if (__predict_false(bus_dmamap_load_mbuf( + xneti->xni_xbusd->xbusd_dmat, + xst->xs_dmamap, xst->xs_m, BUS_DMA_NOWAIT) != 0)) + goto abort; + xst->xs_loaded = true; + dm = xst->xs_dmamap; + seg = 0; + goff = segoff = 0; + } + + gsize = xst->xs_tx_size; + for (; seg < dm->dm_nsegs && gsize > 0; seg++) { + bus_dma_segment_t *ds = &dm->dm_segs[seg]; + ma = ds->ds_addr; + take = uimin(gsize, ds->ds_len); + + KASSERT(copycnt <= NB_XMIT_PAGES_BATCH); + if (copycnt == NB_XMIT_PAGES_BATCH) { + if (xennetback_copy(ifp, xneti->xni_gop_copy, + copycnt) != 0) + goto abort; + copycnt = 0; + } + + /* Queue for the copy */ + gop = &xneti->xni_gop_copy[copycnt++]; + memset(gop, 0, sizeof(*gop)); + gop->flags = GNTCOPY_source_gref; + gop->len = take; + + gop->source.u.ref = xst->xs_tx.gref; + gop->source.offset = xst->xs_tx.offset + goff; + gop->source.domid = xneti->xni_domid; + + gop->dest.offset = (ma & PAGE_MASK) + segoff; + KASSERT(gop->dest.offset <= PAGE_SIZE); + gop->dest.domid = DOMID_SELF; + gop->dest.u.gmfn = ma >> PAGE_SHIFT; + + goff += take; + gsize -= take; + if (take + segoff < ds->ds_len) { + segoff += take; + /* Segment not completely consumed yet */ + break; + } + segoff = 0; + } + KASSERT(gsize == 0); + } + if (copycnt > 0) { + if (xennetback_copy(ifp, xneti->xni_gop_copy, copycnt) != 0) + goto abort; + copycnt = 0; } + + /* If we got here, the whole copy was successful */ + for (int i = 0; i < queued; i++) { + xst = &xneti->xni_xstate[i]; + + xennetback_tx_response(xneti, xst->xs_tx.id, NETIF_RSP_OKAY); + + if (xst->xs_m != NULL) { + KASSERT(xst->xs_loaded); + bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat, + xst->xs_dmamap); + + if (xst->xs_tx.flags & NETTXF_csum_blank) + xennet_checksum_fill(ifp, xst->xs_m); + else if (xst->xs_tx.flags & NETTXF_data_validated) { + xst->xs_m->m_pkthdr.csum_flags = + XN_M_CSUM_SUPPORTED; + } + m_set_rcvif(xst->xs_m, ifp); + + if_percpuq_enqueue(ifp->if_percpuq, xst->xs_m); + } + } + + return; + +abort: + xennetback_tx_copy_abort(ifp, xneti, queued); +} + +static int +xennetback_tx_m0len_fragment(struct xnetback_instance *xneti, + int m0_len, int req_cons, int *cntp) +{ + netif_tx_request_t *txreq; + + /* This assumes all the requests are already pushed into the ring */ + *cntp = 1; + do { + txreq = RING_GET_REQUEST(&xneti->xni_txring, req_cons); + KASSERT(m0_len > txreq->size); + m0_len -= txreq->size; + req_cons++; + (*cntp)++; + } while (txreq->flags & NETTXF_more_data); + + return m0_len; } static int @@ -706,17 +825,16 @@ xennetback_evthandler(void *arg) struct xnetback_instance *xneti = arg; struct ifnet *ifp = &xneti->xni_if; netif_tx_request_t txreq; - struct mbuf *m; + struct mbuf *m, *m0 = NULL, *mlast = NULL; int receive_pending; RING_IDX req_cons; - gnttab_copy_t *gop; - paddr_t ma; - int queued = 0; + int queued = 0, m0_len = 0; struct xnetback_xstate *xst; + const bool discard = ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != + (IFF_UP | IFF_RUNNING)); XENPRINTF(("xennetback_evthandler ")); req_cons = xneti->xni_txring.req_cons; - xen_rmb(); while (1) { xen_rmb(); /* be sure to read the request before updating */ xneti->xni_txring.req_cons = req_cons; @@ -725,24 +843,24 @@ xennetback_evthandler(void *arg) receive_pending); if (receive_pending == 0) break; - RING_COPY_REQUEST(&xneti->xni_txring, req_cons, &txreq); + RING_COPY_REQUEST(&xneti->xni_txring, req_cons, + &txreq); xen_rmb(); XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname, txreq.size)); req_cons++; - if (__predict_false((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != - (IFF_UP | IFF_RUNNING))) { - /* interface not up, drop */ + if (__predict_false(discard)) { + /* interface not up, drop all requests */ + if_statinc(ifp, if_iqdrops); xennetback_tx_response(xneti, txreq.id, NETIF_RSP_DROPPED); continue; } /* - * Do some sanity checks, and map the packet's page. + * Do some sanity checks, and queue copy of the data. */ - const char *msg = xennetback_tx_check_packet(&txreq, - xneti->xni_ec.ec_capenable & ETHERCAP_VLAN_MTU); + const char *msg = xennetback_tx_check_packet(&txreq); if (__predict_false(msg != NULL)) { printf("%s: packet with size %d is %s\n", ifp->if_xname, txreq.size, msg); @@ -752,79 +870,129 @@ xennetback_evthandler(void *arg) continue; } - /* get a mbuf for this packet */ + /* get a mbuf for this fragment */ MGETHDR(m, M_DONTWAIT, MT_DATA); if (__predict_false(m == NULL)) { static struct timeval lasttime; +mbuf_fail: if (ratecheck(&lasttime, &xni_pool_errintvl)) printf("%s: mbuf alloc failed\n", ifp->if_xname); + xennetback_tx_copy_abort(ifp, xneti, queued); + queued = 0; + m0 = NULL; xennetback_tx_response(xneti, txreq.id, NETIF_RSP_DROPPED); if_statinc(ifp, if_ierrors); continue; } - if (txreq.size > MHLEN) { + m->m_len = m->m_pkthdr.len = txreq.size; + + if (!m0 && (txreq.flags & NETTXF_more_data)) { + /* + * The first fragment of multi-fragment Tx request + * contains total size. Need to read whole + * chain to determine actual size of the first + * (i.e. current) fragment. + */ + int cnt; + m0_len = xennetback_tx_m0len_fragment(xneti, + txreq.size, req_cons, &cnt); + m->m_len = m0_len; + KASSERT(cnt <= XEN_NETIF_NR_SLOTS_MIN); + + if (queued + cnt >= NB_XMIT_PAGES_BATCH) { + /* + * Flush queue if too full to fit this + * new packet whole. + */ + xennetback_tx_copy_process(ifp, xneti, queued); + queued = 0; + } + } + + if (m->m_len > MHLEN) { MCLGET(m, M_DONTWAIT); - if (__predict_false(m->m_ext_storage.ext_buf == NULL)) { + if (__predict_false((m->m_flags & M_EXT) == 0)) { m_freem(m); - xennetback_tx_response(xneti, txreq.id, - NETIF_RSP_DROPPED); - if_statinc(ifp, if_ierrors); - continue; + goto mbuf_fail; + } + if (__predict_false(m->m_len > MCLBYTES)) { + /* one more mbuf necessary */ + struct mbuf *mn; + MGET(mn, M_DONTWAIT, MT_DATA); + if (__predict_false(mn == NULL)) { + m_freem(m); + goto mbuf_fail; + } + if (m->m_len - MCLBYTES > MLEN) { + MCLGET(mn, M_DONTWAIT); + if ((mn->m_flags & M_EXT) == 0) { + m_freem(mn); + m_freem(m); + goto mbuf_fail; + } + } + mn->m_len = m->m_len - MCLBYTES; + m->m_len = MCLBYTES; + m->m_next = mn; + KASSERT(mn->m_len <= MCLBYTES); + } + KASSERT(m->m_len <= MCLBYTES); + } + + if (m0 || (txreq.flags & NETTXF_more_data)) { + if (m0 == NULL) { + m0 = m; + mlast = (m->m_next) ? m->m_next : m; + KASSERT(mlast->m_next == NULL); + } else { + /* Coalesce like m_cat(), but without copy */ + KASSERT(mlast != NULL); + if (M_TRAILINGSPACE(mlast) >= m->m_pkthdr.len) { + mlast->m_len += m->m_pkthdr.len; + m_freem(m); + } else { + mlast->m_next = m; + mlast = (m->m_next) ? m->m_next : m; + KASSERT(mlast->m_next == NULL); + } } } - m->m_len = m->m_pkthdr.len = txreq.size; XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n", xneti->xni_if.if_xname, txreq.offset, txreq.size, txreq.id, MASK_NETIF_TX_IDX(req_cons))); xst = &xneti->xni_xstate[queued]; - xst->xs_m = m; - xst->xs_id = txreq.id; - xst->xs_flags = txreq.flags; - - if (bus_dmamap_load_mbuf(xneti->xni_xbusd->xbusd_dmat, - xst->xs_dmamap, m, BUS_DMA_NOWAIT) != 0) { - printf("%s: mbuf load failed\n", ifp->if_xname); - m_freem(m); - xennetback_tx_response(xneti, txreq.id, - NETIF_RSP_DROPPED); - if_statinc(ifp, if_ierrors); - continue; - } - ma = xst->xs_dmamap->dm_segs[0].ds_addr; - - /* Queue for the copy */ - gop = &xneti->xni_gop_copy[queued]; - memset(gop, 0, sizeof(*gop)); - gop->flags = GNTCOPY_source_gref; - gop->len = txreq.size; - - gop->source.u.ref = txreq.gref; - gop->source.offset = txreq.offset; - gop->source.domid = xneti->xni_domid; - - gop->dest.offset = ma & PAGE_MASK; - gop->dest.domid = DOMID_SELF; - gop->dest.u.gmfn = ma >> PAGE_SHIFT; - - m->m_len = m->m_pkthdr.len = txreq.size; - + xst->xs_m = (m0 == NULL || m == m0) ? m : NULL; + xst->xs_tx = txreq; + /* Fill the length of _this_ fragment */ + xst->xs_tx_size = (m == m0) ? m0_len : m->m_pkthdr.len; queued++; KASSERT(queued <= NB_XMIT_PAGES_BATCH); + if (__predict_false(m0 && + (txreq.flags & NETTXF_more_data) == 0)) { + /* Last fragment, stop appending mbufs */ + m0 = NULL; + } if (queued == NB_XMIT_PAGES_BATCH) { + KASSERT(m0 == NULL); xennetback_tx_copy_process(ifp, xneti, queued); queued = 0; } } + if (m0) { + /* Queue empty, and still unfinished multi-fragment request */ + printf("%s: dropped unfinished multi-fragment\n", + ifp->if_xname); + xennetback_tx_copy_abort(ifp, xneti, queued); + queued = 0; + m0 = NULL; + } if (queued > 0) xennetback_tx_copy_process(ifp, xneti, queued); - xen_rmb(); /* be sure to read the request before updating pointer */ - xneti->xni_txring.req_cons = req_cons; - xen_wmb(); /* check to see if we can transmit more packets */ if_schedule_deferred_start(ifp);