Author: scottl
Date: Thu Sep 20 01:23:54 2012
New Revision: 240718
URL: http://svn.freebsd.org/changeset/base/240718

Log:
  Sync the ixgbe driver from HEAD to stable/9
  
  r236627 - Fix driver deadlock due to OACTIVE flag
  r236729 - Fix prefetch programming typo
  r239940 - Improve small RX packet performance
  r240155 - Fix missing braces in PHY configuration
  r240366 - Remove a prefetch directive that hurts performance
  
  Approved by:  jfv
  Obtained from:        Netflix, inc.

Modified:
  stable/9/sys/dev/ixgbe/ixgbe.c
  stable/9/sys/dev/ixgbe/ixgbe.h
  stable/9/sys/dev/ixgbe/ixgbe_osdep.h
Directory Properties:
  stable/9/sys/dev/ixgbe/   (props changed)

Modified: stable/9/sys/dev/ixgbe/ixgbe.c
==============================================================================
--- stable/9/sys/dev/ixgbe/ixgbe.c      Thu Sep 20 00:51:09 2012        
(r240717)
+++ stable/9/sys/dev/ixgbe/ixgbe.c      Thu Sep 20 01:23:54 2012        
(r240718)
@@ -1145,7 +1145,7 @@ ixgbe_init_locked(struct adapter *adapte
                 * from the Intel linux driver 3.8.21.
                 * Prefetching enables tx line rate even with 1 queue.
                 */
-               txdctl |= (16 << 0) | (1 << 8);
+               txdctl |= (32 << 0) | (1 << 8);
                IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
        }
 
@@ -1390,7 +1390,7 @@ ixgbe_handle_que(void *context, int pend
                        ixgbe_start_locked(txr, ifp);
 #endif
                IXGBE_TX_UNLOCK(txr);
-               if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
+               if (more) {
                        taskqueue_enqueue(que->tq, &que->que_task);
                        return;
                }
@@ -3698,21 +3698,30 @@ no_split:
                        mp = rxbuf->m_pack;
 
                mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
-               /* Get the memory mapping */
-               error = bus_dmamap_load_mbuf_sg(rxr->ptag,
-                   rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
-               if (error != 0) {
-                       printf("Refresh mbufs: payload dmamap load"
-                           " failure - %d\n", error);
-                       m_free(mp);
-                       rxbuf->m_pack = NULL;
-                       goto update;
+
+               /* If we're dealing with an mbuf that was copied rather
+                * than replaced, there's no need to go through busdma.
+                */
+               if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
+                       /* Get the memory mapping */
+                       error = bus_dmamap_load_mbuf_sg(rxr->ptag,
+                           rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
+                       if (error != 0) {
+                               printf("Refresh mbufs: payload dmamap load"
+                                   " failure - %d\n", error);
+                               m_free(mp);
+                               rxbuf->m_pack = NULL;
+                               goto update;
+                       }
+                       rxbuf->m_pack = mp;
+                       bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
+                           BUS_DMASYNC_PREREAD);
+                       rxbuf->paddr = rxr->rx_base[i].read.pkt_addr =
+                           htole64(pseg[0].ds_addr);
+               } else {
+                       rxr->rx_base[i].read.pkt_addr = rxbuf->paddr;
+                       rxbuf->flags &= ~IXGBE_RX_COPY;
                }
-               rxbuf->m_pack = mp;
-               bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
-                   BUS_DMASYNC_PREREAD);
-               rxr->rx_base[i].read.pkt_addr =
-                   htole64(pseg[0].ds_addr);
 
                refreshed = TRUE;
                /* Next is precalculated */
@@ -4025,6 +4034,7 @@ skip_head:
        rxr->next_to_refresh = 0;
        rxr->lro_enabled = FALSE;
        rxr->rx_split_packets = 0;
+       rxr->rx_copies = 0;
        rxr->rx_bytes = 0;
        rxr->discard = FALSE;
        rxr->vtag_strip = FALSE;
@@ -4580,14 +4590,36 @@ ixgbe_rxeof(struct ix_queue *que, int co
                        ** that determines what we are
                        */
                        sendmp = rbuf->fmp;
-                       rbuf->m_pack = rbuf->fmp = NULL;
 
                        if (sendmp != NULL) {  /* secondary frag */
+                               rbuf->m_pack = rbuf->fmp = NULL;
                                mp->m_flags &= ~M_PKTHDR;
                                sendmp->m_pkthdr.len += mp->m_len;
                        } else {
+                               /*
+                                * Optimize.  This might be a small packet,
+                                * maybe just a TCP ACK.  Do a fast copy that
+                                * is cache aligned into a new mbuf, and
+                                * leave the old mbuf+cluster for re-use.
+                                */
+                               if (eop && plen <= IXGBE_RX_COPY_LEN) {
+                                       sendmp = m_gethdr(M_DONTWAIT, MT_DATA);
+                                       if (sendmp != NULL) {
+                                               sendmp->m_data +=
+                                                   IXGBE_RX_COPY_ALIGN;
+                                               ixgbe_bcopy(mp->m_data,
+                                                   sendmp->m_data, plen);
+                                               sendmp->m_len = plen;
+                                               rxr->rx_copies++;
+                                               rbuf->flags |= IXGBE_RX_COPY;
+                                       }
+                               }
+                               if (sendmp == NULL) {
+                                       rbuf->m_pack = rbuf->fmp = NULL;
+                                       sendmp = mp;
+                               }
+
                                /* first desc of a non-ps chain */
-                               sendmp = mp;
                                sendmp->m_flags |= M_PKTHDR;
                                sendmp->m_pkthdr.len = mp->m_len;
                                if (staterr & IXGBE_RXD_STAT_VP) {
@@ -5438,6 +5470,9 @@ ixgbe_add_hw_stats(struct adapter *adapt
                SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
                                CTLFLAG_RD, &rxr->rx_bytes,
                                "Queue Bytes Received");
+               SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
+                               CTLFLAG_RD, &rxr->rx_copies,
+                               "Copied RX Frames");
                SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
                                CTLFLAG_RD, &lro->lro_queued, 0,
                                "LRO Queued");

Modified: stable/9/sys/dev/ixgbe/ixgbe.h
==============================================================================
--- stable/9/sys/dev/ixgbe/ixgbe.h      Thu Sep 20 00:51:09 2012        
(r240717)
+++ stable/9/sys/dev/ixgbe/ixgbe.h      Thu Sep 20 01:23:54 2012        
(r240718)
@@ -154,6 +154,19 @@
 #define IXGBE_FC_HI            0x20000
 #define IXGBE_FC_LO            0x10000
 
+/*
+ * Used for optimizing small rx mbufs.  Effort is made to keep the copy
+ * small and aligned for the CPU L1 cache.
+ * 
+ * MHLEN is typically 168 bytes, giving us 8-byte alignment.  Getting
+ * 32 byte alignment needed for the fast bcopy results in 8 bytes being
+ * wasted.  Getting 64 byte alignment, which _should_ be ideal for
+ * modern Intel CPUs, results in 40 bytes wasted and a significant drop
+ * in observed efficiency of the optimization, 97.9% -> 81.8%.
+ */
+#define IXGBE_RX_COPY_LEN      160
+#define IXGBE_RX_COPY_ALIGN    (MHLEN - IXGBE_RX_COPY_LEN)
+
 /* Keep older OS drivers building... */
 #if !defined(SYSCTL_ADD_UQUAD)
 #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD
@@ -245,6 +258,9 @@ struct ixgbe_rx_buf {
        struct mbuf     *fmp;
        bus_dmamap_t    hmap;
        bus_dmamap_t    pmap;
+       u_int           flags;
+#define IXGBE_RX_COPY  0x01
+       uint64_t        paddr;
 };
 
 /*
@@ -339,6 +355,7 @@ struct rx_ring {
        /* Soft stats */
        u64                     rx_irq;
        u64                     rx_split_packets;
+       u64                     rx_copies;
        u64                     rx_packets;
        u64                     rx_bytes;
        u64                     rx_discarded;

Modified: stable/9/sys/dev/ixgbe/ixgbe_osdep.h
==============================================================================
--- stable/9/sys/dev/ixgbe/ixgbe_osdep.h        Thu Sep 20 00:51:09 2012        
(r240717)
+++ stable/9/sys/dev/ixgbe/ixgbe_osdep.h        Thu Sep 20 01:23:54 2012        
(r240718)
@@ -143,6 +143,25 @@ void prefetch(void *x)
 #define prefetch(x)
 #endif
 
+/*
+ * Optimized bcopy thanks to Luigi Rizzo's investigative work.  Assumes
+ * non-overlapping regions and 32-byte padding on both src and dst.
+ */
+static __inline int
+ixgbe_bcopy(void *_src, void *_dst, int l)
+{
+       uint64_t *src = _src;
+       uint64_t *dst = _dst;
+
+       for (; l > 0; l -= 32) {
+               *dst++ = *src++;
+               *dst++ = *src++;
+               *dst++ = *src++;
+               *dst++ = *src++;
+       }
+       return (0);
+}
+
 struct ixgbe_osdep
 {
        bus_space_tag_t    mem_bus_space_tag;
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to