classifier changed to object pointer instead of embedded in struct ifnet
to reduce compile dependencies.
There was also issue with including ip6.h in this file, but that could probably
be fixed with struct ip6_hdr forward delcaration and including in6.h instead.

Signed-off-by: Charles Myers <charles.my...@spirent.com>
---
 bsd/sys/kern/sys_socket.cc    |   1 +
 bsd/sys/kern/uipc_sockbuf.cc  |   1 +
 bsd/sys/kern/uipc_socket.cc   |   1 +
 bsd/sys/net/if.cc             |  15 ++
 bsd/sys/net/if_var.h          |  14 +-
 bsd/sys/net/routecache.hh     |   1 +
 bsd/sys/netinet/tcp_input.cc  | 509 ++++++++++++++++++++++++++++++++++++------
 bsd/sys/netinet/tcp_usrreq.cc |  62 +++--
 bsd/sys/sys/socketvar.h       |   3 +-
 core/net_channel.cc           | 183 +++++++++++++--
 drivers/virtio-net.cc         |   2 +-
 drivers/vmxnet3.cc            |   2 +-
 include/osv/net_channel.hh    |  77 ++++++-
 13 files changed, 731 insertions(+), 140 deletions(-)

diff --git a/bsd/sys/kern/sys_socket.cc b/bsd/sys/kern/sys_socket.cc
index 1245b71..52a6a84 100644
--- a/bsd/sys/kern/sys_socket.cc
+++ b/bsd/sys/kern/sys_socket.cc
@@ -39,6 +39,7 @@
 #include <osv/socket.hh>
 #include <osv/initialize.hh>
 #include <osv/poll.h>
+#include <osv/net_channel.hh>
 
 #include <bsd/sys/sys/libkern.h>
 #include <bsd/sys/sys/param.h>
diff --git a/bsd/sys/kern/uipc_sockbuf.cc b/bsd/sys/kern/uipc_sockbuf.cc
index 7895dd9..106e233 100644
--- a/bsd/sys/kern/uipc_sockbuf.cc
+++ b/bsd/sys/kern/uipc_sockbuf.cc
@@ -34,6 +34,7 @@
 #include <osv/poll.h>
 #include <osv/clock.hh>
 #include <osv/signal.hh>
+#include <osv/net_channel.hh>
 
 #include <bsd/porting/netport.h>
 #include <bsd/porting/rwlock.h>
diff --git a/bsd/sys/kern/uipc_socket.cc b/bsd/sys/kern/uipc_socket.cc
index 9f27cfc..7c7c466 100644
--- a/bsd/sys/kern/uipc_socket.cc
+++ b/bsd/sys/kern/uipc_socket.cc
@@ -127,6 +127,7 @@
 #include <bsd/sys/net/vnet.h>
 
 #include <osv/zcopy.hh>
+#include <osv/net_channel.hh>
 
 #define uipc_d(...) tprintf_d("uipc_socket", __VA_ARGS__)
 
diff --git a/bsd/sys/net/if.cc b/bsd/sys/net/if.cc
index 2f2e4cb..a40d2dd 100644
--- a/bsd/sys/net/if.cc
+++ b/bsd/sys/net/if.cc
@@ -74,6 +74,8 @@
 #include <bsd/sys/netinet/if_ether.h>
 #endif
 
+#include <osv/net_channel.hh>
+
 struct ifindex_entry {
        struct  ifnet *ife_ifnet;
 };
@@ -366,9 +368,12 @@ if_alloc(u_char type)
        // bsd defines a variable named 'ifnet', so we must do this ugliness
        typedef struct ifnet s_ifnet;
        std::unique_ptr<s_ifnet> ifp;
+       std::unique_ptr<classifier> if_classifier;
        u_short idx;
 
        ifp.reset(new s_ifnet{});
+       if_classifier.reset(new classifier());
+       ifp->if_classifier = NULL;
        IFNET_WLOCK();
        if (ifindex_alloc_locked(&idx) != 0) {
                IFNET_WUNLOCK();
@@ -396,6 +401,8 @@ if_alloc(u_char type)
 
        refcount_init(&ifp->if_refcount, 1);    /* Index reference. */
        ifnet_setbyindex(ifp->if_index, ifp.get());
+
+       ifp->if_classifier = if_classifier.release();
        return ifp.release();
 }
 
@@ -407,6 +414,7 @@ if_alloc(u_char type)
 static void
 if_free_internal(struct ifnet *ifp)
 {
+       delete ifp->if_classifier;
 
        KASSERT((ifp->if_flags & IFF_DYING),
            ("if_free_internal: interface not dying"));
@@ -3146,3 +3154,10 @@ if_deregister_com_alloc(u_char type)
        if_com_alloc[type] = NULL;
        if_com_free[type] = NULL;
 }
+
+int
+if_net_channel_input(struct ifnet *ifp, struct mbuf *m)
+{
+       return ifp->if_classifier->post_packet(m);
+}
+
diff --git a/bsd/sys/net/if_var.h b/bsd/sys/net/if_var.h
index a079164..151f764 100644
--- a/bsd/sys/net/if_var.h
+++ b/bsd/sys/net/if_var.h
@@ -33,7 +33,6 @@
 #ifndef        _NET_IF_VAR_H_
 #define        _NET_IF_VAR_H_
 
-#include <osv/net_channel.hh>
 
 /*
  * Structures defining a network interface, providing a packet
@@ -88,8 +87,11 @@ struct       vnet;
 #include <bsd/sys/sys/socket.h>
 #include <bsd/porting/rwlock.h>
 #include <bsd/porting/sync_stub.h>
-#include <osv/net_channel.hh>
 
+/*
+ * Forward class declration for OSv
+ */
+class classifier;
 
 __BEGIN_DECLS
 
@@ -177,7 +179,8 @@ struct ifnet {
         * get the interface info and statistics including the one gathered by 
HW
         */
        void (*if_getinfo)(struct ifnet *, struct if_data *);
-       classifier if_classifier;
+
+       classifier *if_classifier;
 
        struct  vnet *if_home_vnet;     /* where this ifnet originates from */
        struct  bsd_ifaddr      *if_addr;       /* pointer to link-level 
address */
@@ -214,9 +217,6 @@ struct ifnet {
        char    if_cspare[3];
        int     if_ispare[4];
        void    *if_pspare[8];          /* 1 netmap, 7 TDB */
-
-       void add_net_channel(net_channel* nc, ipv4_tcp_conn_id id) { 
if_classifier.add(id, nc); }
-       void del_net_channel(ipv4_tcp_conn_id id) { if_classifier.remove(id); }
 };
 
 typedef void if_init_f_t(void *);
@@ -771,6 +771,8 @@ typedef     void if_com_free_t(void *com, u_char type);
 void   if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f);
 void   if_deregister_com_alloc(u_char type);
 
+int    if_net_channel_input(struct ifnet *, struct mbuf *);
+
 #define IF_LLADDR(ifp)                                                 \
     LLADDR((struct bsd_sockaddr_dl *)((ifp)->if_addr->ifa_addr))
 
diff --git a/bsd/sys/net/routecache.hh b/bsd/sys/net/routecache.hh
index 0c5c1f4..b6bb861 100644
--- a/bsd/sys/net/routecache.hh
+++ b/bsd/sys/net/routecache.hh
@@ -53,6 +53,7 @@
 #include <bsd/sys/sys/domain.h>
 #include <bsd/sys/net/if.h>
 #include <bsd/sys/net/if_dl.h>
+#include <bsd/sys/netinet/in.h>
 #include <bsd/sys/netinet/in_var.h>
 
 #include <bsd/sys/net/route.h>
diff --git a/bsd/sys/netinet/tcp_input.cc b/bsd/sys/netinet/tcp_input.cc
index c710082..13512e5 100644
--- a/bsd/sys/netinet/tcp_input.cc
+++ b/bsd/sys/netinet/tcp_input.cc
@@ -77,6 +77,15 @@
 #include <bsd/sys/netinet/icmp_var.h>  /* for ICMP_BANDLIM */
 #include <bsd/sys/netinet/ip_var.h>
 #include <bsd/sys/netinet/ip_options.h>
+#ifdef INET6
+#include <bsd/sys/netinet/ip6.h>
+#include <bsd/sys/netinet6/nd6.h>
+#include <bsd/sys/netinet6/tcp6_var.h>
+#include <bsd/sys/netinet6/ip6_var.h>
+#include <bsd/sys/netinet/icmp6.h>
+#include <bsd/sys/netinet6/in6_pcb.h>
+#include <bsd/sys/netinet6/scope6_var.h>
+#endif
 #include <bsd/sys/netinet/tcp_fsm.h>
 #include <bsd/sys/netinet/tcp_seq.h>
 #include <bsd/sys/netinet/tcp_timer.h>
@@ -91,6 +100,7 @@
 #include <osv/poll.h>
 #include <osv/net_trace.hh>
 #include <osv/aligned_new.hh>
+#include <osv/net_channel.hh>
 
 TRACEPOINT(trace_tcp_input_ack, "%p: We've got ACK: %u", void*, unsigned int);
 
@@ -260,6 +270,9 @@ cc_conn_init(struct tcpcb *tp)
        struct hc_metrics_lite metrics;
        struct inpcb *inp = tp->t_inpcb;
        int rtt;
+#ifdef INET6
+       int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
+#endif
 
        INP_LOCK_ASSERT(tp->t_inpcb);
 
@@ -322,8 +335,18 @@ cc_conn_init(struct tcpcb *tp)
        if (V_tcp_do_rfc3390)
                tp->snd_cwnd = bsd_min(4 * tp->t_maxseg,
                    bsd_max(2 * tp->t_maxseg, 4380));
+#ifdef INET6
+       else if (isipv6 && in6_localaddr(&inp->in6p_faddr))
+               tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
+#endif
+#if defined(INET) && defined(INET6)
+       else if (!isipv6 && in_localaddr(inp->inp_faddr))
+               tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
+#endif
+#ifdef INET
        else if (in_localaddr(inp->inp_faddr))
                tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
+#endif
        else
                tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz;
 
@@ -409,9 +432,6 @@ tcp_fields_to_host(struct tcphdr *th)
 }
 
 
-/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
-#define ND6_HINT(tp)
-
 /*
  * Indicate whether this ack should be delayed.  We can delay the ack if
  *     - there is no delayed ack timer in progress and
@@ -426,24 +446,73 @@ tcp_fields_to_host(struct tcphdr *th)
            (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
 
+/*
+ * TCP input handling is split into multiple parts:
+ *   tcp6_input is a thin wrapper around tcp_input for the extended
+ *     ip6_protox[] call format in ip6_input
+ *   tcp_input handles primary segment validation, inpcb lookup and
+ *     SYN processing on listen sockets
+ *   tcp_do_segment processes the ACK and text of the segment for
+ *     establishing, established and closing connections
+ */
+#ifdef INET6
+int
+tcp6_input(struct mbuf **mp, int *offp, int proto)
+{
+       struct mbuf *m = *mp;
+       struct in6_ifaddr *ia6;
+
+       IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
+
+       /*
+        * draft-itojun-ipv6-tcp-to-anycast
+        * better place to put this in?
+        */
+       ia6 = ip6_getdstifaddr(m);
+       if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {
+               struct ip6_hdr *ip6;
+
+               ifa_free(&ia6->ia_ifa);
+               ip6 = mtod(m, struct ip6_hdr *);
+               icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
+                           (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
+               return IPPROTO_DONE;
+       }
+       if (ia6)
+               ifa_free(&ia6->ia_ifa);
+
+       tcp_input(m, *offp);
+       return IPPROTO_DONE;
+}
+#endif /* INET6 */
+
 void
 tcp_input(struct mbuf *m, int off0)
 {
        struct tcphdr *th = NULL;
        struct ip *ip = NULL;
+#ifdef INET
        struct ipovly *ipov;
+#endif
        struct inpcb *inp = NULL;
        struct tcpcb *tp = NULL;
        struct socket *so = NULL;
        u_char *optp = NULL;
        int optlen = 0;
+#ifdef INET
        int len;
+#endif
        int tlen = 0, off;
        int drop_hdrlen;
        int thflags;
        int rstreason = 0;      /* For badport_bandlim accounting purposes */
        uint8_t iptos = 0;
+#ifdef INET6
+       struct ip6_hdr *ip6 = NULL;
+       int isipv6;
+#else
        const void *ip6 = NULL;
+#endif /* INET6 */
        struct tcpopt to;               /* options in this segment */
        char *s = NULL;                 /* address and port logging */
        int ti_locked;
@@ -460,10 +529,63 @@ tcp_input(struct mbuf *m, int off0)
        short ostate = 0;
 #endif
 
+#ifdef INET6
+       isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
+#endif
 
        to.to_flags = 0;
        TCPSTAT_INC(tcps_rcvtotal);
 
+#ifdef INET6
+       if (isipv6) {
+               /* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */
+
+               if (m->m_hdr.mh_len < (sizeof(*ip6) + sizeof(*th))) {
+                       m = m_pullup(m, sizeof(*ip6) + sizeof(*th));
+                       if (m == NULL) {
+                               TCPSTAT_INC(tcps_rcvshort);
+                               return;
+                       }
+               }
+
+               ip6 = mtod(m, struct ip6_hdr *);
+               th = (struct tcphdr *)((caddr_t)ip6 + off0);
+               tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
+
+               if (m->M_dat.MH.MH_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
+                       if (m->M_dat.MH.MH_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
+                               th->th_sum = m->M_dat.MH.MH_pkthdr.csum_data;
+                       else
+                               th->th_sum = in6_cksum_pseudo(ip6, tlen,
+                                   IPPROTO_TCP, 
m->M_dat.MH.MH_pkthdr.csum_data);
+                       th->th_sum ^= 0xffff;
+               } else
+                       th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen);
+               if (th->th_sum) {
+                       TCPSTAT_INC(tcps_rcvbadsum);
+                       goto drop;
+               }
+
+               /*
+                * Be proactive about unspecified IPv6 address in source.
+                * As we use all-zero to indicate unbounded/unconnected pcb,
+                * unspecified IPv6 address can be used to confuse us.
+                *
+                * Note that packets with unspecified IPv6 destination is
+                * already dropped in ip6_input.
+                */
+               if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
+                       /* XXX stat */
+                       goto drop;
+               }
+       }
+#endif
+#if defined(INET) && defined(INET6)
+       else
+#endif
+#ifdef INET
+       {
+
        /*
         * Get IP and TCP header together in first mbuf.
         * Note: IP leaves IP header in first mbuf.
@@ -514,8 +636,19 @@ tcp_input(struct mbuf *m, int off0)
        }
        /* Re-initialization for later version check */
        ip->ip_v = IPVERSION;
+       }
+#endif /* INET */
 
-       iptos = ip->ip_tos;
+#ifdef INET6
+       if (isipv6)
+               iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+#endif
+#if defined(INET) && defined(INET6)
+       else
+#endif
+#ifdef INET
+               iptos = ip->ip_tos;
+#endif
 
        /*
         * Check that TCP offset makes sense,
@@ -528,16 +661,30 @@ tcp_input(struct mbuf *m, int off0)
        }
        tlen -= off;    /* tlen is used instead of ti->ti_len */
        if (off > sizeof (struct tcphdr)) {
-               if (m->m_hdr.mh_len < sizeof(struct ip) + off) {
-                       if ((m = m_pullup(m, sizeof (struct ip) + off))
-                           == NULL) {
-                               TCPSTAT_INC(tcps_rcvshort);
-                               return;
+#ifdef INET6
+               if (isipv6) {
+                       IP6_EXTHDR_CHECK(m, off0, off, );
+                       ip6 = mtod(m, struct ip6_hdr *);
+                       th = (struct tcphdr *)((caddr_t)ip6 + off0);
+               }
+#endif
+#if defined(INET) && defined(INET6)
+               else
+#endif
+#ifdef INET
+               {
+                       if (m->m_hdr.mh_len < sizeof(struct ip) + off) {
+                               if ((m = m_pullup(m, sizeof (struct ip) + off))
+                                   == NULL) {
+                                       TCPSTAT_INC(tcps_rcvshort);
+                                       return;
+                               }
+                               ip = mtod(m, struct ip *);
+                               ipov = (struct ipovly *)ip;
+                               th = (struct tcphdr *)((caddr_t)ip + off0);
                        }
-                       ip = mtod(m, struct ip *);
-                       ipov = (struct ipovly *)ip;
-                       th = (struct tcphdr *)((caddr_t)ip + off0);
                }
+#endif
                optlen = off - sizeof (struct tcphdr);
                optp = (u_char *)(th + 1);
        }
@@ -575,10 +722,21 @@ findpcb:
        }
 #endif
 
-       inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
-           th->th_sport, ip->ip_dst, th->th_dport,
-           INPLOOKUP_WILDCARD | INPLOOKUP_LOCKPCB,
-           m->M_dat.MH.MH_pkthdr.rcvif, m);
+#ifdef INET6
+       if (isipv6) {
+               inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src,
+                       th->th_sport, &ip6->ip6_dst, th->th_dport,
+                       INPLOOKUP_WILDCARD | INPLOOKUP_LOCKPCB,
+                       m->M_dat.MH.MH_pkthdr.rcvif, m);
+       }
+       else
+#endif
+       {
+               inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
+                       th->th_sport, ip->ip_dst, th->th_dport,
+                       INPLOOKUP_WILDCARD | INPLOOKUP_LOCKPCB,
+                       m->M_dat.MH.MH_pkthdr.rcvif, m);
+       }
 
        /*
         * If the INPCB does not exist then all data in the incoming
@@ -617,10 +775,29 @@ findpcb:
                inp->inp_flowid = m->M_dat.MH.MH_pkthdr.flowid;
        }
 
+#ifdef IPSEC
+#ifdef INET6
+       if (isipv6 && ipsec6_in_reject(m, inp)) {
+               V_ipsec6stat.in_polvio++;
+               goto dropunlock;
+       } else
+#endif /* INET6 */
+       if (ipsec4_in_reject(m, inp) != 0) {
+               V_ipsec4stat.in_polvio++;
+               goto dropunlock;
+       }
+#endif /* IPSEC */
+
+
        /*
         * Check the minimum TTL for socket.
         */
        if (inp->inp_ip_minttl != 0) {
+#ifdef INET6
+               if (isipv6 && inp->inp_ip_minttl > ip6->ip6_hlim)
+                       goto dropunlock;
+               else
+#endif
                if (inp->inp_ip_minttl > ip->ip_ttl)
                        goto dropunlock;
        }
@@ -669,6 +846,7 @@ relocked:
                INP_INFO_WUNLOCK(&V_tcbinfo);
                return;
        }
+
        /*
         * The TCPCB may no longer exist if the connection is winding
         * down or it is in the CLOSED state.  Either way we drop the
@@ -719,6 +897,11 @@ relocked:
 #ifdef TCPDEBUG
        if (so->so_options & SO_DEBUG) {
                ostate = tp->get_state();
+#ifdef INET6
+               if (isipv6) {
+                       bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6));
+               } else
+#endif
                        bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
                tcp_savetcp = *th;
        }
@@ -738,6 +921,13 @@ relocked:
                INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 
                bzero(&inc, sizeof(inc));
+#ifdef INET6
+               if (isipv6) {
+                       inc.inc_flags |= INC_ISIPV6;
+                       inc.inc6_faddr = ip6->ip6_src;
+                       inc.inc6_laddr = ip6->ip6_dst;
+               } else
+#endif
                {
                        inc.inc_faddr = ip->ip_src;
                        inc.inc_laddr = ip->ip_dst;
@@ -891,6 +1081,56 @@ relocked:
                    ("%s: Listen socket: TH_RST or TH_ACK set", __func__));
                KASSERT(thflags & (TH_SYN),
                    ("%s: Listen socket: TH_SYN not set", __func__));
+#ifdef INET6
+               /*
+                * If deprecated address is forbidden,
+                * we do not accept SYN to deprecated interface
+                * address to prevent any new inbound connection from
+                * getting established.
+                * When we do not accept SYN, we send a TCP RST,
+                * with deprecated source address (instead of dropping
+                * it).  We compromise it as it is much better for peer
+                * to send a RST, and RST will be the final packet
+                * for the exchange.
+                *
+                * If we do not forbid deprecated addresses, we accept
+                * the SYN packet.  RFC2462 does not suggest dropping
+                * SYN in this case.
+                * If we decipher RFC2462 5.5.4, it says like this:
+                * 1. use of deprecated addr with existing
+                *    communication is okay - "SHOULD continue to be
+                *    used"
+                * 2. use of it with new communication:
+                *   (2a) "SHOULD NOT be used if alternate address
+                *        with sufficient scope is available"
+                *   (2b) nothing mentioned otherwise.
+                * Here we fall into (2b) case as we have no choice in
+                * our source address selection - we must obey the peer.
+                *
+                * The wording in RFC2462 is confusing, and there are
+                * multiple description text for deprecated address
+                * handling - worse, they are not exactly the same.
+                * I believe 5.5.4 is the best one, so we follow 5.5.4.
+                */
+               if (isipv6 && !V_ip6_use_deprecated) {
+                       struct in6_ifaddr *ia6;
+
+                       ia6 = ip6_getdstifaddr(m);
+                       if (ia6 != NULL &&
+                           (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
+                               ifa_free(&ia6->ia_ifa);
+                               if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+                                   bsd_log(LOG_DEBUG, "%s; %s: Listen socket: "
+                                       "Connection attempt to deprecated "
+                                       "IPv6 address rejected\n",
+                                       s, __func__);
+                               rstreason = BANDLIM_RST_OPENPORT;
+                               goto dropwithreset;
+                       }
+                       if (ia6)
+                               ifa_free(&ia6->ia_ifa);
+               }
+#endif /* INET6 */
                /*
                 * Basic sanity checks on incoming SYN requests:
                 *   Don't respond if the destination is a link layer
@@ -909,25 +1149,52 @@ relocked:
                                "link layer address ignored\n", s, __func__);
                        goto dropunlock;
                }
-               if (th->th_dport == th->th_sport &&
-                   ip->ip_dst.s_addr == ip->ip_src.s_addr) {
-                       if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
-                           bsd_log(LOG_DEBUG, "%s; %s: Listen socket: "
-                               "Connection attempt from/to self "
-                               "ignored\n", s, __func__);
-                       goto dropunlock;
+#ifdef INET6
+               if (isipv6) {
+                       if (th->th_dport == th->th_sport &&
+                           IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) {
+                               if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+                                   bsd_log(LOG_DEBUG, "%s; %s: Listen socket: "
+                                       "Connection attempt to/from self "
+                                       "ignored\n", s, __func__);
+                               goto dropunlock;
+                       }
+                       if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+                           IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
+                               if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+                                   bsd_log(LOG_DEBUG, "%s; %s: Listen socket: "
+                                       "Connection attempt from/to multicast "
+                                       "address ignored\n", s, __func__);
+                               goto dropunlock;
+                       }
                }
-               if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
-                   IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
-                   ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
-                   in_broadcast(ip->ip_dst, m->M_dat.MH.MH_pkthdr.rcvif)) {
-                       if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
-                           bsd_log(LOG_DEBUG, "%s; %s: Listen socket: "
-                               "Connection attempt from/to broad- "
-                               "or multicast address ignored\n",
-                               s, __func__);
-                       goto dropunlock;
+#endif
+#if defined(INET) && defined(INET6)
+               else
+#endif
+#ifdef INET
+               {
+                       if (th->th_dport == th->th_sport &&
+                           ip->ip_dst.s_addr == ip->ip_src.s_addr) {
+                               if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+                                   bsd_log(LOG_DEBUG, "%s; %s: Listen socket: "
+                                       "Connection attempt from/to self "
+                                       "ignored\n", s, __func__);
+                               goto dropunlock;
+                       }
+                       if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+                           IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+                           ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
+                           in_broadcast(ip->ip_dst, 
m->M_dat.MH.MH_pkthdr.rcvif)) {
+                               if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+                                   bsd_log(LOG_DEBUG, "%s; %s: Listen socket: "
+                                       "Connection attempt from/to broad- "
+                                       "or multicast address ignored\n",
+                                       s, __func__);
+                               goto dropunlock;
+                       }
                }
+#endif
                /*
                 * SYN appears to be valid.  Create compressed TCP state
                 * for syncache.
@@ -945,9 +1212,17 @@ relocked:
                 */
                INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
                return;
+       } else if (tp->get_state()== TCPS_LISTEN) {
+               /*
+                * When a listen socket is torn down the SO_ACCEPTCONN
+                * flag is removed first while connections are drained
+                * from the accept queue in a unlock/lock cycle of the
+                * ACCEPT_LOCK, opening a race condition allowing a SYN
+                * attempt to go through unhandled.
+                */
+               goto dropunlock;
        }
 
-
        /*
         * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
         * state.  tcp_do_segment() always consumes the mbuf chain and unlocks 
pcbinfo.
@@ -1049,7 +1324,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
         * if we fail, drop the packet.  FIXME: invert the lock order so we 
don't
         * have to drop packets.
         */
-       if (tp->get_state() != TCPS_ESTABLISHED && ti_locked == TI_UNLOCKED) {
+       if ((tp->get_state() != TCPS_ESTABLISHED
+       || (thflags & (TH_SYN | TH_FIN | TH_RST) != 0))
+          && ti_locked == TI_UNLOCKED) {
                if (INP_INFO_TRY_WLOCK(&V_tcbinfo)) {
                        ti_locked = TI_WLOCKED;
                } else {
@@ -1188,7 +1465,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
            th->th_seq == tp->rcv_nxt &&
            (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
            tp->snd_nxt == tp->snd_max &&
-           tiwin && tiwin == tp->snd_wnd && 
+           tiwin && tiwin == tp->snd_wnd &&
            ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
            LIST_EMPTY(&tp->t_segq) &&
            ((to.to_flags & TOF_TS) == 0 ||
@@ -1263,7 +1540,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
                                if (tp->snd_una > tp->snd_recover &&
                                    th->th_ack <= tp->snd_recover)
                                        tp->snd_recover = th->th_ack - 1;
-                               
+
                                /*
                                 * Let the congestion control algorithm update
                                 * congestion control related information. This
@@ -1511,7 +1788,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
                                tp->t_flags |= TF_ECN_PERMIT;
                                TCPSTAT_INC(tcps_ecn_shs);
                        }
-                       
+
                        /*
                         * Received <SYN,ACK> in SYN_SENT[*] state.
                         * Transitions:
@@ -1848,14 +2125,14 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
        /*
         * If last ACK falls within this segment's sequence numbers,
         * record its timestamp.
-        * NOTE: 
+        * NOTE:
         * 1) That the test incorporates suggestions from the latest
         *    proposal of the tc...@cray.com list (Braden 1993/04/26).
         * 2) That updating only on newer timestamps interferes with
         *    our earlier PAWS tests, so this check should be solely
         *    predicated on the sequence space of this segment.
-        * 3) That we modify the segment boundary check to be 
-        *        Last.ACK.Sent <= SEG.SEQ + SEG.Len  
+        * 3) That we modify the segment boundary check to be
+        *        Last.ACK.Sent <= SEG.SEQ + SEG.Len
         *    instead of RFC1323's
         *        Last.ACK.Sent < SEG.SEQ + SEG.Len,
         *    This modified check allows us to overcome RFC1323's
@@ -1971,13 +2248,15 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
                        tcp_sack_doack(tp, &to, th->th_ack);
 
                if (th->th_ack <= tp->snd_una) {
-                       if (tlen == 0 && tiwin == tp->snd_wnd) {
+                       if (tlen == 0 && tiwin == tp->snd_wnd &&
+                               !(thflags & TH_FIN)) {
                                TCPSTAT_INC(tcps_rcvdupack);
                                /*
                                 * If we have outstanding data (other than
                                 * a window probe), this is a completely
                                 * duplicate ack (ie, window info didn't
-                                * change), the ack is the biggest we've
+                                * change and FIN isn't set),
+                                * the ack is the biggest we've
                                 * seen and we've seen exactly our rexmt
                                 * threshhold of them, assume a packet
                                 * has been dropped and retransmit it.
@@ -2009,10 +2288,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, 
struct socket *so,
                                        if ((tp->t_flags & TF_SACK_PERMIT) &&
                                            IN_FASTRECOVERY(tp->t_flags)) {
                                                int awnd;
-                                               
+
                                                /*
                                                 * Compute the amount of data 
in flight first.
-                                                * We can inject new data into 
the pipe iff 
+                                                * We can inject new data into 
the pipe iff
                                                 * we have less than 1/2 the 
original window's
                                                 * worth of data in flight.
                                                 */
@@ -2616,7 +2895,12 @@ static void
 tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
     int tlen, int rstreason)
 {
+#ifdef INET
        struct ip *ip;
+#endif
+#ifdef INET6
+       struct ip6_hdr *ip6;
+#endif
 
        if (tp != NULL) {
                INP_LOCK_ASSERT(tp->t_inpcb);
@@ -2625,6 +2909,19 @@ tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, 
struct tcpcb *tp,
        /* Don't bother if destination was broadcast/multicast. */
        if ((th->th_flags & TH_RST) || m->m_hdr.mh_flags & (M_BCAST|M_MCAST))
                goto drop;
+#ifdef INET6
+       if (mtod(m, struct ip *)->ip_v == 6) {
+               ip6 = mtod(m, struct ip6_hdr *);
+               if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+                   IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
+                       goto drop;
+               /* IPv6 anycast check is done at tcp6_input() */
+       }
+#endif
+#if defined(INET) && defined(INET6)
+       else
+#endif
+#ifdef INET
        {
                ip = mtod(m, struct ip *);
                if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
@@ -2633,6 +2930,7 @@ tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, 
struct tcpcb *tp,
                    in_broadcast(ip->ip_dst, m->M_dat.MH.MH_pkthdr.rcvif))
                        goto drop;
        }
+#endif
 
        /* Perform bandwidth limiting. */
        if (badport_bandlim(rstreason) < 0)
@@ -3054,7 +3352,7 @@ tcp_mss(struct tcpcb *tp, int offer)
        int mtuflags = 0;
 
        KASSERT(tp != NULL, ("%s: tp == NULL", __func__));
-       
+
        tcp_mss_update(tp, offer, -1, &metrics, &mtuflags);
 
        mss = tp->t_maxseg;
@@ -3188,8 +3486,17 @@ tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr 
*th)
 
 // INP_LOCK held
 static void
-tcp_net_channel_packet(tcpcb* tp, mbuf* m)
+tcp_net_channel_ipv4_packet(tcpcb* tp, mbuf* m)
 {
+       if (tp->get_state() <= TCPS_LISTEN) {
+               // We can't hand this packet off to tcp_do_segment due to the
+               // current connection state.    Drop the channel and handle the
+               // packet via the slow path.
+               tcp_teardown_net_channel(tp);
+               netisr_dispatch(NETISR_ETHER, m);
+               return;
+       }
+
        log_packet_handling(m, NETISR_ETHER);
        caddr_t start = m->m_hdr.mh_data;
        auto h = start;
@@ -3207,6 +3514,7 @@ tcp_net_channel_packet(tcpcb* tp, mbuf* m)
        auto tlen = ip_len - (ip_size + (th->th_off << 2));
        auto iptos = ip_hdr->ip_tos;
        SOCK_LOCK_ASSERT(so);
+
        bool want_close;
        m_trim(m, ETHER_HDR_LEN + ip_len);
        tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, TI_UNLOCKED, 
want_close);
@@ -3214,26 +3522,91 @@ tcp_net_channel_packet(tcpcb* tp, mbuf* m)
        assert(!want_close);
 }
 
-static ipv4_tcp_conn_id tcp_connection_id(tcpcb* tp)
+static void tcp_ipv4_connection_id(const tcpcb* tp, ipv4_tcp_conn_id* id)
 {
        auto& conn = tp->t_inpcb->inp_inc.inc_ie;
-       return {
-               conn.ie_dependfaddr.ie46_foreign.ia46_addr4,
-               conn.ie_dependladdr.ie46_local.ia46_addr4,
-               ntohs(conn.ie_fport),
-               ntohs(conn.ie_lport)
-       };
+       id->src_addr = conn.ie_dependfaddr.ie46_foreign.ia46_addr4;
+       id->dst_addr = conn.ie_dependladdr.ie46_local.ia46_addr4;
+       id->src_port = ntohs(conn.ie_fport);
+       id->dst_port = ntohs(conn.ie_lport);
 }
 
+#ifdef INET6
+
+// INP_LOCK held
+static void
+tcp_net_channel_ipv6_packet(tcpcb* tp, mbuf* m)
+{
+       if (tp->get_state() <= TCPS_LISTEN) {
+               // We can't hand this packet off to tcp_do_segment due to the
+               // current connection state.    Drop the channel and handle the
+               // packet via the slow path.
+               tcp_teardown_net_channel(tp);
+               netisr_dispatch(NETISR_ETHER, m);
+               return;
+       }
+
+       log_packet_handling(m, NETISR_ETHER);
+       caddr_t start = m->m_hdr.mh_data;
+       auto h = start;
+       h += ETHER_HDR_LEN;
+       auto ip_hdr = reinterpret_cast<ip6_hdr*>(h);
+       int ip_off = ETHER_HDR_LEN;
+       int nxt;
+       int nxt_off = ip6_lasthdr(m, ip_off, IPPROTO_IPV6, &nxt);
+       h = start + nxt_off;
+       auto th = reinterpret_cast<tcphdr*>(h);
+       h += th->th_off << 2;
+       auto drop_hdrlen = h - start;
+       tcp_fields_to_host(th);
+       trace_tcp_input_ack(tp, th->th_ack.raw());
+       auto so = tp->t_inpcb->inp_socket;
+       auto ip_len = sizeof(*ip_hdr) + ntohs(ip_hdr->ip6_plen);
+       auto tlen = ip_len - ((nxt_off - ip_off) +  (th->th_off << 2));
+       auto iptos = (ntohl(ip_hdr->ip6_flow) >> 20) & 0xff;
+       SOCK_LOCK_ASSERT(so);
+
+       bool want_close;
+       m_trim(m, ETHER_HDR_LEN + ip_len);
+       tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, TI_UNLOCKED, 
want_close);
+       // since a socket is still attached, we should not be closing
+       assert(!want_close);
+}
+
+static void tcp_ipv6_connection_id(const tcpcb* tp, ipv6_tcp_conn_id* id)
+{
+       auto& conn = tp->t_inpcb->inp_inc.inc_ie;
+       id->src_addr = conn.ie_dependfaddr.ie6_foreign;
+       in6_clearscope(&id->src_addr);
+       id->dst_addr = conn.ie_dependladdr.ie6_local;
+       in6_clearscope(&id->dst_addr);
+       id->src_port = ntohs(conn.ie_fport);
+       id->dst_port = ntohs(conn.ie_lport);
+}
+
+#endif /* INET6 */
+
 void
 tcp_setup_net_channel(tcpcb* tp, struct ifnet* intf)
 {
-       auto nc = aligned_new<net_channel>([=] (mbuf *m) { 
tcp_net_channel_packet(tp, m); });
-       tp->nc = nc;
        tp->nc_intf = intf;
-       intf->add_net_channel(nc, tcp_connection_id(tp));
+#ifdef INET6
+       if (tp->t_inpcb->inp_inc.inc_flags & INC_ISIPV6) {
+               ipv6_tcp_conn_id id;
+               tcp_ipv6_connection_id(tp, &id);
+               tp->nc = aligned_new<net_channel>([=] (mbuf *m) { 
tcp_net_channel_ipv6_packet(tp, m); });
+               intf->if_classifier->add(id, tp->nc);
+       }
+       else
+#endif
+       {
+               ipv4_tcp_conn_id id;
+               tcp_ipv4_connection_id(tp, &id);
+               tp->nc = aligned_new<net_channel>([=] (mbuf *m) { 
tcp_net_channel_ipv4_packet(tp, m); });
+               intf->if_classifier->add(id, tp->nc);
+       }
        auto so = tp->t_inpcb->inp_socket;
-       so->so_nc = nc;
+       so->so_nc = tp->nc;
        if (so->fp) {
                WITH_LOCK(so->fp->f_lock) {
                        for (auto&& pl : so->fp->f_poll_list) {
@@ -3250,10 +3623,23 @@ tcp_setup_net_channel(tcpcb* tp, struct ifnet* intf)
 
 void tcp_teardown_net_channel(tcpcb *tp)
 {
-       if (!tp->nc_intf) {
+       auto intf = tp->nc_intf;
+       if (!intf) {
                return;
        }
-       tp->nc_intf->del_net_channel(tcp_connection_id(tp));
+#ifdef INET6
+       if (tp->t_inpcb->inp_inc.inc_flags & INC_ISIPV6) {
+               ipv6_tcp_conn_id id;
+               tcp_ipv6_connection_id(tp, &id);
+               intf->if_classifier->remove(id);
+       }
+       else
+#endif
+       {
+               ipv4_tcp_conn_id id;
+               tcp_ipv4_connection_id(tp, &id);
+               intf->if_classifier->remove(id);
+       }
        tp->nc_intf = nullptr;
        // keep tp->nc around since it might still contain packets
 }
@@ -3272,9 +3658,6 @@ tcp_free_net_channel(tcpcb* tp)
                }
                so->so_nc = nullptr;
        }
-       if (tp->nc_intf) {
-               tp->nc_intf->del_net_channel(tcp_connection_id(tp));
-       }
        osv::rcu_dispose(tp->nc);
        tp->nc = nullptr;
 }
diff --git a/bsd/sys/netinet/tcp_usrreq.cc b/bsd/sys/netinet/tcp_usrreq.cc
index f2b80e0..d2c8dda 100644
--- a/bsd/sys/netinet/tcp_usrreq.cc
+++ b/bsd/sys/netinet/tcp_usrreq.cc
@@ -70,6 +70,7 @@
 #ifdef INET6
 #include <bsd/sys/netinet/ip6.h>
 #include <bsd/sys/netinet6/in6_pcb.h>
+#include <bsd/sys/netinet6/in6_var.h>
 #include <bsd/sys/netinet6/ip6_var.h>
 #include <bsd/sys/netinet6/scope6_var.h>
 #endif
@@ -83,6 +84,7 @@
 #endif
 
 #include <osv/poll.h>
+#include <osv/net_channel.hh>
 
 /*
  * TCP protocol interface to socket abstraction.
@@ -296,6 +298,7 @@ tcp6_usr_bind(struct socket *so, struct bsd_sockaddr *nam, 
struct thread *td)
        sin6p = (struct bsd_sockaddr_in6 *)nam;
        if (nam->sa_len != sizeof (*sin6p))
                return (EINVAL);
+
        /*
         * Must check for multicast addresses and disallow binding
         * to them.
@@ -312,6 +315,7 @@ tcp6_usr_bind(struct socket *so, struct bsd_sockaddr *nam, 
struct thread *td)
                error = EINVAL;
                goto out;
        }
+
        tp = intotcpcb(inp);
        TCPDEBUG1();
        INP_HASH_WLOCK(&V_tcbinfo);
@@ -327,14 +331,13 @@ tcp6_usr_bind(struct socket *so, struct bsd_sockaddr 
*nam, struct thread *td)
                        in6_sin6_2_sin(&sin, sin6p);
                        inp->inp_vflag |= INP_IPV4;
                        inp->inp_vflag &= ~INP_IPV6;
-                       error = in_pcbbind(inp, (struct bsd_sockaddr *)&sin,
-                           td->td_ucred);
+                       error = in_pcbbind(inp, (struct bsd_sockaddr *)&sin, 0);
                        INP_HASH_WUNLOCK(&V_tcbinfo);
                        goto out;
                }
        }
 #endif
-       error = in6_pcbbind(inp, nam, td->td_ucred);
+       error = in6_pcbbind(inp, nam, 0);
        INP_HASH_WUNLOCK(&V_tcbinfo);
 out:
        TCPDEBUG2(PRU_BIND);
@@ -408,7 +411,7 @@ tcp6_usr_listen(struct socket *so, int backlog, struct 
thread *td)
                inp->inp_vflag &= ~INP_IPV4;
                if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
                        inp->inp_vflag |= INP_IPV4;
-               error = in6_pcbbind(inp, (struct bsd_sockaddr *)0, 
td->td_ucred);
+               error = in6_pcbbind(inp, (struct bsd_sockaddr *)0, 0);
        }
        INP_HASH_WUNLOCK(&V_tcbinfo);
        if (error == 0) {
@@ -517,23 +520,18 @@ tcp6_usr_connect(struct socket *so, struct bsd_sockaddr 
*nam, struct thread *td)
                in6_sin6_2_sin(&sin, sin6p);
                inp->inp_vflag |= INP_IPV4;
                inp->inp_vflag &= ~INP_IPV6;
-               if ((error = prison_remote_ip4(td->td_ucred,
-                   &sin.sin_addr)) != 0)
-                       goto out;
                if ((error = tcp_connect(tp, (struct bsd_sockaddr *)&sin, td)) 
!= 0)
                        goto out;
-               error = tcp_output_connect(so, nam);
+               error = tcp_output(tp);
                goto out;
        }
 #endif
        inp->inp_vflag &= ~INP_IPV4;
        inp->inp_vflag |= INP_IPV6;
        inp->inp_inc.inc_flags |= INC_ISIPV6;
-       if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0)
-               goto out;
        if ((error = tcp6_connect(tp, nam, td)) != 0)
                goto out;
-       error = tcp_output_connect(so, nam);
+       error = tcp_output(tp);
 
 out:
        TCPDEBUG2(PRU_CONNECT);
@@ -642,7 +640,6 @@ tcp6_usr_accept(struct socket *so, struct bsd_sockaddr 
**nam)
 
        inp = sotoinpcb(so);
        KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
-       INP_INFO_RLOCK(&V_tcbinfo);
        INP_LOCK(inp);
        if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
                error = ECONNABORTED;
@@ -668,7 +665,6 @@ tcp6_usr_accept(struct socket *so, struct bsd_sockaddr 
**nam)
 out:
        TCPDEBUG2(PRU_ACCEPT);
        INP_UNLOCK(inp);
-       INP_INFO_RUNLOCK(&V_tcbinfo);
        if (error == 0) {
                if (v4)
                        *nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -1029,25 +1025,25 @@ struct pr_usrreqs tcp_usrreqs = initialize_with([] 
(pr_usrreqs& x) {
 #endif /* INET */
 
 #ifdef INET6
-struct pr_usrreqs tcp6_usrreqs = {
-       .pru_abort =            tcp_usr_abort,
-       .pru_accept =           tcp6_usr_accept,
-       .pru_attach =           tcp_usr_attach,
-       .pru_bind =             tcp6_usr_bind,
-       .pru_connect =          tcp6_usr_connect,
-       .pru_control =          in6_control,
-       .pru_detach =           tcp_usr_detach,
-       .pru_disconnect =       tcp_usr_disconnect,
-       .pru_listen =           tcp6_usr_listen,
-       .pru_peeraddr =         in6_mapped_peeraddr,
-       .pru_rcvd =             tcp_usr_rcvd,
-       .pru_rcvoob =           tcp_usr_rcvoob,
-       .pru_send =             tcp_usr_send,
-       .pru_shutdown =         tcp_usr_shutdown,
-       .pru_sockaddr =         in6_mapped_sockaddr,
-       .pru_sosetlabel =       in_pcbsosetlabel,
-       .pru_close =            tcp_usr_close,
-};
+struct pr_usrreqs tcp6_usrreqs = initialize_with([] (pr_usrreqs& x) {
+       x.pru_abort =           tcp_usr_abort;
+       x.pru_accept =          tcp6_usr_accept;
+       x.pru_attach =          tcp_usr_attach;
+       x.pru_bind =            tcp6_usr_bind;
+       x.pru_connect =         tcp6_usr_connect;
+       x.pru_control =         in6_control;
+       x.pru_detach =          tcp_usr_detach;
+       x.pru_disconnect =      tcp_usr_disconnect;
+       x.pru_listen =          tcp6_usr_listen;
+       x.pru_peeraddr =                in6_mapped_peeraddr;
+       x.pru_rcvd =            tcp_usr_rcvd;
+       x.pru_rcvoob =          tcp_usr_rcvoob;
+       x.pru_send =            tcp_usr_send;
+       x.pru_shutdown =                tcp_usr_shutdown;
+       x.pru_sockaddr =                in6_mapped_sockaddr;
+       x.pru_sosetlabel =      in_pcbsosetlabel;
+       x.pru_close =           tcp_usr_close;
+});
 #endif /* INET6 */
 
 #ifdef INET
@@ -1136,7 +1132,7 @@ tcp6_connect(struct tcpcb *tp, struct bsd_sockaddr *nam, 
struct thread *td)
        INP_HASH_WLOCK(&V_tcbinfo);
 
        if (inp->inp_lport == 0) {
-               error = in6_pcbbind(inp, (struct bsd_sockaddr *)0, 
td->td_ucred);
+               error = in6_pcbbind(inp, (struct bsd_sockaddr *)0, 0);
                if (error)
                        goto out;
        }
diff --git a/bsd/sys/sys/socketvar.h b/bsd/sys/sys/socketvar.h
index b02b9c2..44b9979 100644
--- a/bsd/sys/sys/socketvar.h
+++ b/bsd/sys/sys/socketvar.h
@@ -42,10 +42,11 @@
 #ifdef _KERNEL
 #include <bsd/sys/sys/sockopt.h>
 #endif
-#include <osv/net_channel.hh>
 
 struct vnet;
 
+class net_channel;
+
 /*
  * Kernel structure per socket.
  * Contains send and receive buffer queues,
diff --git a/core/net_channel.cc b/core/net_channel.cc
index 67ba99e..322d434 100644
--- a/core/net_channel.cc
+++ b/core/net_channel.cc
@@ -8,9 +8,17 @@
 #include <osv/net_channel.hh>
 #include <osv/poll.h>
 #include <bsd/sys/sys/mbuf.h>
+#include <bsd/sys/net/if_var.h>
 #include <bsd/sys/net/ethernet.h>
 #include <bsd/sys/netinet/ip.h>
-#include <bsd/sys/netinet/ip.h>
+#ifdef INET6
+#include <bsd/sys/netinet/in.h>
+#include <bsd/sys/netinet/ip6.h>
+#include <bsd/sys/netinet6/in6.h>
+#include <bsd/sys/netinet6/ip6_var.h>
+#include <bsd/sys/compat/linux/linux.h>
+#include <bsd/sys/compat/linux/linux_socket.h>
+#endif /* INET6 */
 #include <bsd/sys/netinet/tcp.h>
 #include <bsd/sys/net/ethernet.h>
 #include <bsd/sys/net/netisr.h>
@@ -25,11 +33,28 @@ std::ostream& operator<<(std::ostream& os, in_addr ia)
             (x >> 24) & 255, (x >> 16) & 255, (x >> 8) & 255, x & 255);
 }
 
-std::ostream& operator<<(std::ostream& os, ipv4_tcp_conn_id id)
+std::ostream& operator<<(std::ostream& os, const ipv4_tcp_conn_id& id)
 {
     return osv::fprintf(os, "{ ipv4 %s:%d -> %s:%d }", id.src_addr, 
id.src_port, id.dst_addr, id.dst_port);
 }
 
+#ifdef INET6
+
+std::ostream& operator<<(std::ostream& os, in6_addr ia)
+{
+    char ipstr[INET6_ADDRSTRLEN];
+    if (inet_ntop(LINUX_AF_INET6, ia.s6_addr, ipstr, sizeof(ipstr)) == 0)
+        return os << "????";
+    return  os << ipstr;
+}
+
+std::ostream& operator<<(std::ostream& os, const ipv6_tcp_conn_id& id)
+{
+    return osv::fprintf(os, "{ ipv6 %s:%d -> %s:%d }", id.src_addr, 
id.src_port, id.dst_addr, id.dst_port);
+}
+
+#endif
+
 void net_channel::process_queue()
 {
     mbuf* m;
@@ -109,27 +134,48 @@ classifier::classifier()
 {
 }
 
-void classifier::add(ipv4_tcp_conn_id id, net_channel* channel)
+void classifier::add(const ipv4_tcp_conn_id& id, net_channel* channel)
 {
     WITH_LOCK(_mtx) {
         _ipv4_tcp_channels.emplace(id, channel);
     }
 }
 
-void classifier::remove(ipv4_tcp_conn_id id)
+void classifier::remove(const ipv4_tcp_conn_id& id)
 {
     WITH_LOCK(_mtx) {
         auto i = _ipv4_tcp_channels.owner_find(id,
-                std::hash<ipv4_tcp_conn_id>(), key_item_compare());
+                std::hash<ipv4_tcp_conn_id>(), 
key_item_compare<ipv4_tcp_conn_id>());
         assert(i);
         _ipv4_tcp_channels.erase(i);
     }
 }
 
+#ifdef INET6
+
+void classifier::add(const ipv6_tcp_conn_id& id, net_channel* channel)
+{
+    WITH_LOCK(_mtx) {
+        _ipv6_tcp_channels.emplace(id, channel);
+    }
+}
+
+void classifier::remove(const ipv6_tcp_conn_id& id)
+{
+    WITH_LOCK(_mtx) {
+        auto i = _ipv6_tcp_channels.owner_find(id,
+                std::hash<ipv6_tcp_conn_id>(), 
key_item_compare<ipv6_tcp_conn_id>());
+        assert(i);
+        _ipv6_tcp_channels.erase(i);
+    }
+}
+
+#endif /* INET6 */
+
 bool classifier::post_packet(mbuf* m)
 {
     WITH_LOCK(osv::rcu_read_lock) {
-        if (auto nc = classify_ipv4_tcp(m)) {
+        if (auto nc = classify_packet(m)) {
             log_packet_in(m, NETISR_ETHER);
             if (!nc->push(m)) {
                 return false;
@@ -143,20 +189,33 @@ bool classifier::post_packet(mbuf* m)
 }
 
 // must be called with rcu lock held
-net_channel* classifier::classify_ipv4_tcp(mbuf* m)
+net_channel* classifier::classify_packet(mbuf* m)
 {
-    caddr_t h = m->m_hdr.mh_data;
-    if (unsigned(m->m_hdr.mh_len) < ETHER_HDR_LEN + sizeof(ip)) {
+    if (unsigned(m->m_hdr.mh_len) < ETHER_HDR_LEN)
+        return nullptr;
+    auto ether_hdr = mtod(m, struct ether_header*);
+    uint8_t *payload = (uint8_t *)(ether_hdr) + sizeof(*ether_hdr);
+    size_t payload_size = m->m_hdr.mh_len - sizeof(*ether_hdr);
+    switch(ntohs(ether_hdr->ether_type)) {
+    case ETHERTYPE_IP:
+        return classify_ipv4_tcp(m, reinterpret_cast<ip*>(payload), 
payload_size);
+#ifdef INET6
+    case ETHERTYPE_IPV6:
+        return classify_ipv6_tcp(m, reinterpret_cast<ip6_hdr*>(payload), 
payload_size);
+#endif
+    default:
         return nullptr;
     }
-    auto ether_hdr = reinterpret_cast<ether_header*>(h);
-    if (ntohs(ether_hdr->ether_type) != ETHERTYPE_IP) {
+}
+
+// must be called with rcu lock held
+net_channel* classifier::classify_ipv4_tcp(mbuf* m, ip *ip_hdr, size_t ip_len)
+{
+    if (ip_len < sizeof(*ip_hdr)) {
         return nullptr;
     }
-    h += ETHER_HDR_LEN;
-    auto ip_hdr = reinterpret_cast<ip*>(h);
-    unsigned ip_size = ip_hdr->ip_hl << 2;
-    if (ip_size < sizeof(ip)) {
+    unsigned ip_hdr_len = ip_hdr->ip_hl << 2;
+    if (ip_hdr_len < sizeof(ip)) {
         return nullptr;
     }
     if (ip_hdr->ip_p != IPPROTO_TCP) {
@@ -165,20 +224,98 @@ net_channel* classifier::classify_ipv4_tcp(mbuf* m)
     if (ntohs(ip_hdr->ip_off) & ~IP_DF) {
         return nullptr;
     }
-    auto src_addr = ip_hdr->ip_src;
-    auto dst_addr = ip_hdr->ip_dst;
-    h += ip_size;
-    auto tcp_hdr = reinterpret_cast<tcphdr*>(h);
+
+    auto tcp_hdr = reinterpret_cast<tcphdr*>(reinterpret_cast<uint8_t 
*>(ip_hdr) + ip_hdr_len);
     if (tcp_hdr->th_flags & (TH_SYN | TH_FIN | TH_RST)) {
            return nullptr;
     }
-    auto src_port = ntohs(tcp_hdr->th_sport);
-    auto dst_port = ntohs(tcp_hdr->th_dport);
-    auto id = ipv4_tcp_conn_id{src_addr, dst_addr, src_port, dst_port};
+    ipv4_tcp_conn_id id;
+    id.src_addr = ip_hdr->ip_src;
+    id.dst_addr = ip_hdr->ip_dst;
+    id.src_port = ntohs(tcp_hdr->th_sport);
+    id.dst_port = ntohs(tcp_hdr->th_dport);
     auto i = _ipv4_tcp_channels.reader_find(id,
-            std::hash<ipv4_tcp_conn_id>(), key_item_compare());
+            std::hash<ipv4_tcp_conn_id>(), 
key_item_compare<ipv4_tcp_conn_id>());
+    if (!i) {
+        return nullptr;
+    }
+    return i->chan;
+}
+
+#ifdef INET6
+
+/* get offset for the last header in the chain unless packet is fragemented.
+ * m_will be kept untainted.
+ *
+ * netchannel code doesn't handle fragmented packets, so these need to go
+ * on the slow path.
+ *
+ * This code is based on the FreeBSD ip6_lasthdr() function.
+ */
+static int
+ip6_lasthdr_nofrag(struct mbuf *m, int off, int proto, int *nxtp)
+{
+    int newoff;
+    int nxt;
+
+    if (!nxtp) {
+        nxt = -1;
+        nxtp = &nxt;
+    }
+    while (1) {
+        newoff = ip6_nexthdr(m, off, proto, nxtp);
+        if (newoff < 0)
+            return off;
+        else if (newoff < off)
+            return -1;      /* invalid */
+        else if (newoff == off)
+            return newoff;
+
+        off = newoff;
+        proto = *nxtp;
+
+        if (proto == IPPROTO_FRAGMENT)
+             return -1;
+    }
+}
+
+
+// must be called with rcu lock held
+net_channel* classifier::classify_ipv6_tcp(mbuf* m, ip6_hdr *ip_hdr, size_t 
ip_len)
+{
+    int nxt;
+    int nxt_off;
+    int ip_off;
+    uint8_t *start;
+
+    if (ip_len < sizeof(*ip_hdr)) {
+        return nullptr;
+    }
+    start = mtod(m, uint8_t*);
+    ip_off = (uintptr_t)(reinterpret_cast<uint8_t*>(ip_hdr) - start);
+    nxt_off = ip6_lasthdr_nofrag(m, ip_off, IPPROTO_IPV6, &nxt);
+    if (nxt_off < 0 || nxt != IPPROTO_TCP) {
+        return nullptr;
+    }
+
+    auto tcp_hdr = reinterpret_cast<tcphdr*>(start + nxt_off);
+    if (tcp_hdr->th_flags & (TH_SYN | TH_FIN | TH_RST)) {
+           return nullptr;
+    }
+
+    ipv6_tcp_conn_id id;
+    id.src_addr = ip_hdr->ip6_src;
+    id.dst_addr = ip_hdr->ip6_dst;
+    id.src_port = ntohs(tcp_hdr->th_sport);
+    id.dst_port = ntohs(tcp_hdr->th_dport);
+
+    auto i = _ipv6_tcp_channels.reader_find(id,
+            std::hash<ipv6_tcp_conn_id>(), 
key_item_compare<ipv6_tcp_conn_id>());
     if (!i) {
         return nullptr;
     }
     return i->chan;
 }
+
+#endif /* INET6 */
+
diff --git a/drivers/virtio-net.cc b/drivers/virtio-net.cc
index b820c36..5ba4d23 100644
--- a/drivers/virtio-net.cc
+++ b/drivers/virtio-net.cc
@@ -492,7 +492,7 @@ void net::receiver()
             rx_packets++;
             rx_bytes += m_head->M_dat.MH.MH_pkthdr.len;
 
-            bool fast_path = _ifn->if_classifier.post_packet(m_head);
+            bool fast_path = if_net_channel_input(_ifn, m_head);
             if (!fast_path) {
                 (*_ifn->if_input)(_ifn, m_head);
             }
diff --git a/drivers/vmxnet3.cc b/drivers/vmxnet3.cc
index 3b4c0cc..fff932b 100644
--- a/drivers/vmxnet3.cc
+++ b/drivers/vmxnet3.cc
@@ -925,7 +925,7 @@ void vmxnet3_rxqueue::input(vmxnet3_rx_compdesc *rxcd, 
struct mbuf *m)
         checksum(rxcd, m);
     stats.rx_packets++;
     stats.rx_bytes += m->M_dat.MH.MH_pkthdr.len;
-    bool fast_path = _ifn->if_classifier.post_packet(m);
+    bool fast_path = if_net_channel_input(_ifn, m);
     if (!fast_path) {
         (*_ifn->if_input)(_ifn, m);
     }
diff --git a/include/osv/net_channel.hh b/include/osv/net_channel.hh
index 2784e9e..b505f48 100644
--- a/include/osv/net_channel.hh
+++ b/include/osv/net_channel.hh
@@ -20,6 +20,10 @@
 #include <bsd/sys/netinet/ip.h>
 #include <osv/file.h>
 
+#ifdef INET6
+#include <bsd/sys/netinet/ip6.h>
+#endif /* INET6 */
+
 struct mbuf;
 struct pollreq;
 
@@ -80,9 +84,6 @@ public:
 }
 
 struct ipv4_tcp_conn_id {
-    ipv4_tcp_conn_id(in_addr src_addr, in_addr dst_addr, in_port_t src_port, 
in_port_t dst_port)
-        : src_addr(src_addr), dst_addr(dst_addr), src_port(src_port), 
dst_port(dst_port) {}
-
     in_addr src_addr;
     in_addr dst_addr;
     in_port_t src_port;
@@ -100,6 +101,32 @@ struct ipv4_tcp_conn_id {
     }
 };
 
+#ifdef INET6
+
+struct ipv6_tcp_conn_id {
+    in6_addr src_addr;
+    in6_addr dst_addr;
+    in_port_t src_port;
+    in_port_t dst_port;
+
+    static uint32_t hash_in6_addr(const in6_addr &addr) {
+        uint32_t *a = (uint32_t*) &addr.s6_addr;
+        return ( a[0] ^ a[1] ^ a[2] ^ a[3] );
+    }
+    size_t hash() const {
+        // FIXME: protection against hash attacks?
+        return hash_in6_addr(src_addr) ^ hash_in6_addr(dst_addr) ^ src_port ^ 
dst_port;
+    }
+    bool operator==(const ipv6_tcp_conn_id& x) const {
+        return memcmp(&src_addr, &x.src_addr, sizeof(src_addr)) == 0
+            && memcmp(&dst_addr, &x.dst_addr, sizeof(dst_addr)) == 0
+            && src_port == x.src_port
+            && dst_port == x.dst_port;
+    }
+};
+
+#endif /* INET6 */
+
 namespace std {
 
 template <>
@@ -107,35 +134,61 @@ struct hash<ipv4_tcp_conn_id> {
     size_t operator()(ipv4_tcp_conn_id x) const { return x.hash(); }
 };
 
+#ifdef INET6
+
+template <>
+struct hash<ipv6_tcp_conn_id> {
+    size_t operator()(ipv6_tcp_conn_id x) const { return x.hash(); }
+};
+
+#endif /* INET6 */
+
 }
 
 class classifier {
 public:
     classifier();
     // consumer side operations
-    void add(ipv4_tcp_conn_id id, net_channel* channel);
-    void remove(ipv4_tcp_conn_id id);
+    void add(const ipv4_tcp_conn_id& id, net_channel* channel);
+    void remove(const ipv4_tcp_conn_id& id);
+#ifdef INET6
+    void add(const ipv6_tcp_conn_id& id, net_channel* channel);
+    void remove(const ipv6_tcp_conn_id& id);
+#endif /* INET6 */
+
     // producer side operations
     bool post_packet(mbuf* m);
 private:
-    net_channel* classify_ipv4_tcp(mbuf* m);
+    net_channel* classify_packet(mbuf* m);
+    net_channel* classify_ipv4_tcp(mbuf* m, struct ip* ip, size_t ip_len);
+#ifdef INET6
+    net_channel* classify_ipv6_tcp(mbuf* m, struct ip6_hdr* ip, size_t ip_len);
+#endif /* INET6 */
+
 private:
+    template <class KeyType>
     struct item {
-        item(const ipv4_tcp_conn_id& key, net_channel* chan) : key(key), 
chan(chan) {}
-        ipv4_tcp_conn_id key;
+        item(const KeyType& key, net_channel* chan) : key(key), chan(chan) {}
+        KeyType key;
         net_channel* chan;
     };
-    struct item_hash : private std::hash<ipv4_tcp_conn_id> {
-        size_t operator()(const item& i) const { return 
std::hash<ipv4_tcp_conn_id>::operator()(i.key); }
+    template <class KeyType>
+    struct item_hash : private std::hash<KeyType> {
+        size_t operator()(const item<KeyType>& i) const { return 
std::hash<KeyType>::operator()(i.key); }
     };
+    template <class KeyType>
     struct key_item_compare {
-        bool operator()(const ipv4_tcp_conn_id& key, const item& item) const {
+        bool operator()(const KeyType& key, const item<KeyType>& item) const {
             return key == item.key;
         }
     };
-    using ipv4_tcp_channels = osv::rcu_hashtable<item, item_hash>;
     mutex _mtx;
+    using ipv4_tcp_channels = osv::rcu_hashtable<item<ipv4_tcp_conn_id>, 
item_hash<ipv4_tcp_conn_id>>;
     ipv4_tcp_channels _ipv4_tcp_channels;
+#ifdef INET6
+    using ipv6_tcp_channels = osv::rcu_hashtable<item<ipv6_tcp_conn_id>, 
item_hash<ipv6_tcp_conn_id>>;
+    ipv6_tcp_channels _ipv6_tcp_channels;
+#endif /* INET6 */
 };
 
 #endif /* NETCHANNEL_HH_ */
-- 
2.7.4

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Reply via email to