hashing bits of packet headers to tie connections to particular
physical interfaces within a trunk turns out to be fairly expensive.
in my very unscientific testing it is about 20% of the cost of udp
traffic generated with tcpbench -u.

we could tune or change the hash. eg, going from siphash 2 4 to
siphash 1 1 halves the overhead of hashing. however, it occurred
to me that sometimes we already know about connections. why not
reuse that info if it is available?

this lets pf embed the state id into the mbuf as a "flow id" so
other subsystems can use it. eg, trunk can pull it out and use it.

this diff steals the pad field in mbuf packet headers and uses it
to embed a flow id. it makes pf fill it in, and trunk use it. this
avoids the cost of hashing in trunk altogether.

it could be used in other places too, eg, picking an upstream when
we're going multipath routing.

thoughts?

Index: sys/mbuf.h
===================================================================
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.196
diff -u -p -r1.196 mbuf.h
--- sys/mbuf.h  14 Aug 2015 05:25:29 -0000      1.196
+++ sys/mbuf.h  18 Sep 2015 05:47:30 -0000
@@ -125,7 +125,7 @@ struct      pkthdr {
        SLIST_HEAD(packet_tags, m_tag) tags;    /* list of packet tags */
        int                      len;           /* total packet length */
        u_int16_t                tagsset;       /* mtags attached */
-       u_int16_t                pad;
+       u_int16_t                flowid;        /* pseudo unique flow id */
        u_int16_t                csum_flags;    /* checksum flags */
        u_int16_t                ether_vtag;    /* Ethernet 802.1p+Q vlan tag */
        u_int                    ph_rtableid;   /* routing table id */
@@ -236,6 +236,10 @@ struct mbuf {
 #define        MT_FTABLE       5       /* fragment reassembly header */
 #define        MT_CONTROL      6       /* extra-data protocol message */
 #define        MT_OOBDATA      7       /* expedited data  */
+
+/* flowid field */
+#define M_FLOWID_VALID 0x8000  /* is the flowid set */
+#define M_FLOWID_MASK  0x7fff  /* flow id to map to path */
 
 /* flags to m_get/MGET */
 #define        M_DONTWAIT      M_NOWAIT
Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.944
diff -u -p -r1.944 pf.c
--- net/pf.c    13 Sep 2015 17:53:44 -0000      1.944
+++ net/pf.c    18 Sep 2015 05:47:30 -0000
@@ -6531,11 +6531,15 @@ done:
                pd.m->m_pkthdr.pf.qid = qid;
        if (pd.dir == PF_IN && s && s->key[PF_SK_STACK])
                pd.m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
-       if (pd.dir == PF_OUT &&
-           pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk &&
-           s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) {
-               pd.m->m_pkthdr.pf.inp->inp_pf_sk = s->key[PF_SK_STACK];
-               s->key[PF_SK_STACK]->inp = pd.m->m_pkthdr.pf.inp;
+       if (pd.dir == PF_OUT && s) {
+               if (pd.m->m_pkthdr.pf.inp &&
+                   !pd.m->m_pkthdr.pf.inp->inp_pf_sk &&
+                   s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) {
+                       pd.m->m_pkthdr.pf.inp->inp_pf_sk = s->key[PF_SK_STACK];
+                       s->key[PF_SK_STACK]->inp = pd.m->m_pkthdr.pf.inp;
+               }
+               pd.m->m_pkthdr.flowid = M_FLOWID_VALID |
+                   (M_FLOWID_MASK & bemtoh64(&s->id));
        }
 
        /*
Index: net/if_trunk.c
===================================================================
RCS file: /cvs/src/sys/net/if_trunk.c,v
retrieving revision 1.111
diff -u -p -r1.111 if_trunk.c
--- net/if_trunk.c      10 Sep 2015 16:41:30 -0000      1.111
+++ net/if_trunk.c      18 Sep 2015 05:47:30 -0000
@@ -985,6 +990,9 @@ trunk_hashmbuf(struct mbuf *m, SIPHASH_K
 #endif
        SIPHASH_CTX ctx;
 
+       if (m->m_pkthdr.flowid & M_FLOWID_VALID)
+               return (m->m_pkthdr.flowid & M_FLOWID_MASK);
+
        SipHash24_Init(&ctx, key);
        off = sizeof(*eh);
        if (m->m_len < off)

Reply via email to