Module Name: src
Committed By: jdolecek
Date: Wed Apr 8 21:51:43 UTC 2020
Modified Files:
src/sys/dev/pci: if_wm.c
Log Message:
actually writing always the checksum offload context descriptor
makes the hw do extra processing, avoid doing that if possible -
on my computer with I219 this results in about 2% speedup in Tx performance
change adoped from FreeBSD
XXX should be also done for multiqueue case, but I don't have hw to test it
To generate a diff of this commit:
cvs rdiff -u -r1.670 -r1.671 src/sys/dev/pci/if_wm.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/dev/pci/if_wm.c
diff -u src/sys/dev/pci/if_wm.c:1.670 src/sys/dev/pci/if_wm.c:1.671
--- src/sys/dev/pci/if_wm.c:1.670 Sat Mar 21 16:47:05 2020
+++ src/sys/dev/pci/if_wm.c Wed Apr 8 21:51:42 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: if_wm.c,v 1.670 2020/03/21 16:47:05 thorpej Exp $ */
+/* $NetBSD: if_wm.c,v 1.671 2020/04/08 21:51:42 jdolecek Exp $ */
/*
* Copyright (c) 2001, 2002, 2003, 2004 Wasabi Systems, Inc.
@@ -82,7 +82,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_wm.c,v 1.670 2020/03/21 16:47:05 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_wm.c,v 1.671 2020/04/08 21:51:42 jdolecek Exp $");
#ifdef _KERNEL_OPT
#include "opt_net_mpsafe.h"
@@ -379,6 +379,12 @@ struct wm_txqueue {
bool txq_sending;
time_t txq_lastsent;
+ /* Checksum flags used for previous packet */
+ uint32_t txq_last_hw_cmd;
+ uint8_t txq_last_hw_fields;
+ uint16_t txq_last_hw_ipcs;
+ uint16_t txq_last_hw_tucs;
+
uint32_t txq_packets; /* for AIM */
uint32_t txq_bytes; /* for AIM */
#ifdef WM_EVENT_COUNTERS
@@ -403,6 +409,7 @@ struct wm_txqueue {
WM_Q_EVCNT_DEFINE(txq, toomanyseg) /* Pkt dropped(toomany DMA segs) */
WM_Q_EVCNT_DEFINE(txq, defrag) /* m_defrag() */
WM_Q_EVCNT_DEFINE(txq, underrun) /* Tx underrun */
+ WM_Q_EVCNT_DEFINE(txq, skipcontext) /* Tx skip wring cksum context */
char txq_txseg_evcnt_names[WM_NTXSEGS][sizeof("txqXXtxsegXXX")];
struct evcnt txq_ev_txseg[WM_NTXSEGS]; /* Tx packets w/ N segments */
@@ -6947,6 +6954,7 @@ wm_alloc_txrx_queues(struct wm_softc *sc
WM_Q_MISC_EVCNT_ATTACH(txq, toomanyseg, txq, i, xname);
WM_Q_MISC_EVCNT_ATTACH(txq, defrag, txq, i, xname);
WM_Q_MISC_EVCNT_ATTACH(txq, underrun, txq, i, xname);
+ WM_Q_MISC_EVCNT_ATTACH(txq, skipcontext, txq, i, xname);
#endif /* WM_EVENT_COUNTERS */
tx_done++;
@@ -7079,6 +7087,7 @@ wm_free_txrx_queues(struct wm_softc *sc)
WM_Q_EVCNT_DETACH(txq, toomanyseg, txq, i);
WM_Q_EVCNT_DETACH(txq, defrag, txq, i);
WM_Q_EVCNT_DETACH(txq, underrun, txq, i);
+ WM_Q_EVCNT_DETACH(txq, skipcontext, txq, i);
#endif /* WM_EVENT_COUNTERS */
/* Drain txq_interq */
@@ -7394,6 +7403,9 @@ wm_tx_offload(struct wm_softc *sc, struc
default:
/* Don't support this protocol or encapsulation. */
+ txq->txq_last_hw_cmd = txq->last_hw_fields = 0;
+ txq->txq_last_hw_ipcs = 0;
+ txq->txq_last_hw_tucs = 0;
*fieldsp = 0;
*cmdp = 0;
return 0;
@@ -7533,13 +7545,47 @@ wm_tx_offload(struct wm_softc *sc, struc
WTX_TCPIP_TUCSE(0) /* Rest of packet */;
}
+ *cmdp = cmd;
+ *fieldsp = fields;
+
/*
* We don't have to write context descriptor for every packet
* except for 82574. For 82574, we must write context descriptor
* for every packet when we use two descriptor queues.
- * It would be overhead to write context descriptor for every packet,
- * however it does not cause problems.
- */
+ *
+ * The 82574L can only remember the *last* context used
+ * regardless of queue that it was use for. We cannot reuse
+ * contexts on this hardware platform and must generate a new
+ * context every time. 82574L hardware spec, section 7.2.6,
+ * second note.
+ *
+ * Setting up new checksum offload context for every
+ * frames takes a lot of processing time for hardware.
+ * This also reduces performance a lot for small sized
+ * frames so avoid it if driver can use previously
+ * configured checksum offload context.
+ * For TSO, in theory we can use the same TSO context if and only if
+ * frame is the same type(IP/TCP) and the same MSS. However
+ * checking whether a frame has the same IP/TCP structure is
+ * hard thing so just ignore that and always restablish a
+ * new TSO context.
+ */
+ KASSERT(!wm_is_using_multiqueue(sc));
+ if ((m0->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) == 0) {
+ if (txq->txq_last_hw_cmd == cmd &&
+ txq->txq_last_hw_fields == fields &&
+ txq->txq_last_hw_ipcs == (ipcs & 0xffff) &&
+ txq->txq_last_hw_tucs == (tucs & 0xffff)) {
+ WM_Q_EVCNT_INCR(txq, skipcontext);
+ return 0;
+ }
+ }
+
+ txq->txq_last_hw_cmd = cmd;
+ txq->txq_last_hw_fields = fields;
+ txq->txq_last_hw_ipcs = (ipcs & 0xffff);
+ txq->txq_last_hw_tucs = (tucs & 0xffff);
+
/* Fill in the context descriptor. */
t = (struct livengood_tcpip_ctxdesc *)
&txq->txq_descs[txq->txq_next];
@@ -7552,9 +7598,6 @@ wm_tx_offload(struct wm_softc *sc, struc
txq->txq_next = WM_NEXTTX(txq, txq->txq_next);
txs->txs_ndesc++;
- *cmdp = cmd;
- *fieldsp = fields;
-
return 0;
}
@@ -7839,6 +7882,8 @@ retry:
continue;
}
} else {
+ txq->txq_last_hw_cmd = txq->last_hw_fields = 0;
+ txq->txq_last_hw_ipcs = txq->last_hw_tucs = 0;
cksumcmd = 0;
cksumfields = 0;
}