Module Name: src
Committed By: ozaki-r
Date: Tue Sep 20 07:19:15 UTC 2022
Modified Files:
src/distrib/sets/lists/comp: mi
src/sys/netinet: Makefile files.netinet tcp_input.c tcp_subr.c
tcp_usrreq.c tcp_var.h
src/sys/rump/net/lib/libnetinet: Makefile.inc
Added Files:
src/sys/netinet: tcp_syncache.c tcp_syncache.h
Log Message:
tcp: separate syn cache stuffs into tcp_syncache.[ch] files
No functional change.
To generate a diff of this commit:
cvs rdiff -u -r1.2423 -r1.2424 src/distrib/sets/lists/comp/mi
cvs rdiff -u -r1.30 -r1.31 src/sys/netinet/Makefile
cvs rdiff -u -r1.29 -r1.30 src/sys/netinet/files.netinet
cvs rdiff -u -r1.433 -r1.434 src/sys/netinet/tcp_input.c
cvs rdiff -u -r1.290 -r1.291 src/sys/netinet/tcp_subr.c
cvs rdiff -u -r0 -r1.1 src/sys/netinet/tcp_syncache.c \
src/sys/netinet/tcp_syncache.h
cvs rdiff -u -r1.231 -r1.232 src/sys/netinet/tcp_usrreq.c
cvs rdiff -u -r1.196 -r1.197 src/sys/netinet/tcp_var.h
cvs rdiff -u -r1.15 -r1.16 src/sys/rump/net/lib/libnetinet/Makefile.inc
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/distrib/sets/lists/comp/mi
diff -u src/distrib/sets/lists/comp/mi:1.2423 src/distrib/sets/lists/comp/mi:1.2424
--- src/distrib/sets/lists/comp/mi:1.2423 Sat Sep 10 15:50:57 2022
+++ src/distrib/sets/lists/comp/mi Tue Sep 20 07:19:14 2022
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.2423 2022/09/10 15:50:57 rillig Exp $
+# $NetBSD: mi,v 1.2424 2022/09/20 07:19:14 ozaki-r Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
./etc/mtree/set.comp comp-sys-root
@@ -2745,6 +2745,7 @@
./usr/include/netinet/tcp_debug.h comp-c-include
./usr/include/netinet/tcp_fsm.h comp-c-include
./usr/include/netinet/tcp_seq.h comp-c-include
+./usr/include/netinet/tcp_syncache.h comp-c-include
./usr/include/netinet/tcp_timer.h comp-c-include
./usr/include/netinet/tcp_var.h comp-c-include
./usr/include/netinet/tcp_vtw.h comp-c-include
Index: src/sys/netinet/Makefile
diff -u src/sys/netinet/Makefile:1.30 src/sys/netinet/Makefile:1.31
--- src/sys/netinet/Makefile:1.30 Thu Sep 6 06:42:00 2018
+++ src/sys/netinet/Makefile Tue Sep 20 07:19:14 2022
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.30 2018/09/06 06:42:00 maxv Exp $
+# $NetBSD: Makefile,v 1.31 2022/09/20 07:19:14 ozaki-r Exp $
INCSDIR= /usr/include/netinet
@@ -8,8 +8,8 @@ INCS= dccp.h icmp6.h icmp_var.h if_ether
in_var.h ip.h ip_carp.h ip6.h ip_ecn.h ip_encap.h \
ip_icmp.h ip_mroute.h ip_var.h pim.h pim_var.h portalgo.h \
sctp.h sctp_uio.h \
- tcp.h tcp_debug.h tcp_fsm.h tcp_seq.h tcp_timer.h tcp_var.h \
- tcpip.h udp.h udp_var.h \
+ tcp.h tcp_debug.h tcp_fsm.h tcp_seq.h tcp_syncache.h tcp_timer.h \
+ tcp_var.h tcpip.h udp.h udp_var.h \
tcp_vtw.h
# ipfilter headers
Index: src/sys/netinet/files.netinet
diff -u src/sys/netinet/files.netinet:1.29 src/sys/netinet/files.netinet:1.30
--- src/sys/netinet/files.netinet:1.29 Mon Mar 8 18:03:25 2021
+++ src/sys/netinet/files.netinet Tue Sep 20 07:19:14 2022
@@ -1,4 +1,4 @@
-# $NetBSD: files.netinet,v 1.29 2021/03/08 18:03:25 christos Exp $
+# $NetBSD: files.netinet,v 1.30 2022/09/20 07:19:14 ozaki-r Exp $
defflag opt_tcp_debug.h TCP_DEBUG
defparam opt_tcp_debug.h TCP_NDEBUG
@@ -45,6 +45,7 @@ file netinet/tcp_input.c inet | inet6
file netinet/tcp_output.c inet | inet6
file netinet/tcp_sack.c inet | inet6
file netinet/tcp_subr.c inet | inet6
+file netinet/tcp_syncache.c inet | inet6
file netinet/tcp_timer.c inet | inet6
file netinet/tcp_usrreq.c inet | inet6
file netinet/tcp_congctl.c inet | inet6
Index: src/sys/netinet/tcp_input.c
diff -u src/sys/netinet/tcp_input.c:1.433 src/sys/netinet/tcp_input.c:1.434
--- src/sys/netinet/tcp_input.c:1.433 Tue May 24 20:50:20 2022
+++ src/sys/netinet/tcp_input.c Tue Sep 20 07:19:14 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tcp_input.c,v 1.433 2022/05/24 20:50:20 andvar Exp $ */
+/* $NetBSD: tcp_input.c,v 1.434 2022/09/20 07:19:14 ozaki-r Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -137,18 +137,8 @@
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
*/
-/*
- * TODO list for SYN cache stuff:
- *
- * Find room for a "state" field, which is needed to keep a
- * compressed state for TIME_WAIT TCBs. It's been noted already
- * that this is fairly important for very high-volume web and
- * mail servers, which use a large number of short-lived
- * connections.
- */
-
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.433 2022/05/24 20:50:20 andvar Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.434 2022/09/20 07:19:14 ozaki-r Exp $");
#ifdef _KERNEL_OPT
#include "opt_inet.h"
@@ -214,6 +204,7 @@ __KERNEL_RCSID(0, "$NetBSD: tcp_input.c,
#include <netinet/tcp_private.h>
#include <netinet/tcp_congctl.h>
#include <netinet/tcp_debug.h>
+#include <netinet/tcp_syncache.h>
#ifdef INET6
#include "faith.h"
@@ -245,8 +236,6 @@ static struct timeval tcp_rst_ppslim_las
static int tcp_ackdrop_ppslim_count = 0;
static struct timeval tcp_ackdrop_ppslim_last;
-static void syn_cache_timer(void *);
-
#define TCP_PAWS_IDLE (24U * 24 * 60 * 60 * PR_SLOWHZ)
/* for modulo comparisons of timestamps */
@@ -426,8 +415,6 @@ extern struct evcnt tcp_reass_fragdup;
static int tcp_reass(struct tcpcb *, const struct tcphdr *, struct mbuf *,
int);
-static int tcp_dooptions(struct tcpcb *, const u_char *, int,
- struct tcphdr *, struct mbuf *, int, struct tcp_opt_info *);
static void tcp4_log_refused(const struct ip *, const struct tcphdr *);
#ifdef INET6
@@ -3155,7 +3142,7 @@ tcp_signature(struct mbuf *m, struct tcp
* Returns -1 if this segment should be dropped. (eg. wrong signature)
* Otherwise returns 0.
*/
-static int
+int
tcp_dooptions(struct tcpcb *tp, const u_char *cp, int cnt, struct tcphdr *th,
struct mbuf *m, int toff, struct tcp_opt_info *oi)
{
@@ -3470,1172 +3457,3 @@ tcp_xmit_timer(struct tcpcb *tp, uint32_
*/
tp->t_softerror = 0;
}
-
-
-/*
- * TCP compressed state engine. Currently used to hold compressed
- * state for SYN_RECEIVED.
- */
-
-u_long syn_cache_count;
-u_int32_t syn_hash1, syn_hash2;
-
-#define SYN_HASH(sa, sp, dp) \
- ((((sa)->s_addr^syn_hash1)*(((((u_int32_t)(dp))<<16) + \
- ((u_int32_t)(sp)))^syn_hash2)))
-#ifndef INET6
-#define SYN_HASHALL(hash, src, dst) \
-do { \
- hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr, \
- ((const struct sockaddr_in *)(src))->sin_port, \
- ((const struct sockaddr_in *)(dst))->sin_port); \
-} while (/*CONSTCOND*/ 0)
-#else
-#define SYN_HASH6(sa, sp, dp) \
- ((((sa)->s6_addr32[0] ^ (sa)->s6_addr32[3] ^ syn_hash1) * \
- (((((u_int32_t)(dp))<<16) + ((u_int32_t)(sp)))^syn_hash2)) \
- & 0x7fffffff)
-
-#define SYN_HASHALL(hash, src, dst) \
-do { \
- switch ((src)->sa_family) { \
- case AF_INET: \
- hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr, \
- ((const struct sockaddr_in *)(src))->sin_port, \
- ((const struct sockaddr_in *)(dst))->sin_port); \
- break; \
- case AF_INET6: \
- hash = SYN_HASH6(&((const struct sockaddr_in6 *)(src))->sin6_addr, \
- ((const struct sockaddr_in6 *)(src))->sin6_port, \
- ((const struct sockaddr_in6 *)(dst))->sin6_port); \
- break; \
- default: \
- hash = 0; \
- } \
-} while (/*CONSTCOND*/0)
-#endif /* INET6 */
-
-static struct pool syn_cache_pool;
-
-/*
- * We don't estimate RTT with SYNs, so each packet starts with the default
- * RTT and each timer step has a fixed timeout value.
- */
-static inline void
-syn_cache_timer_arm(struct syn_cache *sc)
-{
-
- TCPT_RANGESET(sc->sc_rxtcur,
- TCPTV_SRTTDFLT * tcp_backoff[sc->sc_rxtshift], TCPTV_MIN,
- TCPTV_REXMTMAX);
- callout_reset(&sc->sc_timer,
- sc->sc_rxtcur * (hz / PR_SLOWHZ), syn_cache_timer, sc);
-}
-
-#define SYN_CACHE_TIMESTAMP(sc) (tcp_now - (sc)->sc_timebase)
-
-static inline void
-syn_cache_rm(struct syn_cache *sc)
-{
- TAILQ_REMOVE(&tcp_syn_cache[sc->sc_bucketidx].sch_bucket,
- sc, sc_bucketq);
- sc->sc_tp = NULL;
- LIST_REMOVE(sc, sc_tpq);
- tcp_syn_cache[sc->sc_bucketidx].sch_length--;
- callout_stop(&sc->sc_timer);
- syn_cache_count--;
-}
-
-static inline void
-syn_cache_put(struct syn_cache *sc)
-{
- if (sc->sc_ipopts)
- (void) m_free(sc->sc_ipopts);
- rtcache_free(&sc->sc_route);
- sc->sc_flags |= SCF_DEAD;
- if (!callout_invoking(&sc->sc_timer))
- callout_schedule(&(sc)->sc_timer, 1);
-}
-
-void
-syn_cache_init(void)
-{
- int i;
-
- pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, 0, 0,
- "synpl", NULL, IPL_SOFTNET);
-
- /* Initialize the hash buckets. */
- for (i = 0; i < tcp_syn_cache_size; i++)
- TAILQ_INIT(&tcp_syn_cache[i].sch_bucket);
-}
-
-void
-syn_cache_insert(struct syn_cache *sc, struct tcpcb *tp)
-{
- struct syn_cache_head *scp;
- struct syn_cache *sc2;
- int s;
-
- /*
- * If there are no entries in the hash table, reinitialize
- * the hash secrets.
- */
- if (syn_cache_count == 0) {
- syn_hash1 = cprng_fast32();
- syn_hash2 = cprng_fast32();
- }
-
- SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa);
- sc->sc_bucketidx = sc->sc_hash % tcp_syn_cache_size;
- scp = &tcp_syn_cache[sc->sc_bucketidx];
-
- /*
- * Make sure that we don't overflow the per-bucket
- * limit or the total cache size limit.
- */
- s = splsoftnet();
- if (scp->sch_length >= tcp_syn_bucket_limit) {
- TCP_STATINC(TCP_STAT_SC_BUCKETOVERFLOW);
- /*
- * The bucket is full. Toss the oldest element in the
- * bucket. This will be the first entry in the bucket.
- */
- sc2 = TAILQ_FIRST(&scp->sch_bucket);
-#ifdef DIAGNOSTIC
- /*
- * This should never happen; we should always find an
- * entry in our bucket.
- */
- if (sc2 == NULL)
- panic("syn_cache_insert: bucketoverflow: impossible");
-#endif
- syn_cache_rm(sc2);
- syn_cache_put(sc2); /* calls pool_put but see spl above */
- } else if (syn_cache_count >= tcp_syn_cache_limit) {
- struct syn_cache_head *scp2, *sce;
-
- TCP_STATINC(TCP_STAT_SC_OVERFLOWED);
- /*
- * The cache is full. Toss the oldest entry in the
- * first non-empty bucket we can find.
- *
- * XXX We would really like to toss the oldest
- * entry in the cache, but we hope that this
- * condition doesn't happen very often.
- */
- scp2 = scp;
- if (TAILQ_EMPTY(&scp2->sch_bucket)) {
- sce = &tcp_syn_cache[tcp_syn_cache_size];
- for (++scp2; scp2 != scp; scp2++) {
- if (scp2 >= sce)
- scp2 = &tcp_syn_cache[0];
- if (! TAILQ_EMPTY(&scp2->sch_bucket))
- break;
- }
-#ifdef DIAGNOSTIC
- /*
- * This should never happen; we should always find a
- * non-empty bucket.
- */
- if (scp2 == scp)
- panic("syn_cache_insert: cacheoverflow: "
- "impossible");
-#endif
- }
- sc2 = TAILQ_FIRST(&scp2->sch_bucket);
- syn_cache_rm(sc2);
- syn_cache_put(sc2); /* calls pool_put but see spl above */
- }
-
- /*
- * Initialize the entry's timer.
- */
- sc->sc_rxttot = 0;
- sc->sc_rxtshift = 0;
- syn_cache_timer_arm(sc);
-
- /* Link it from tcpcb entry */
- LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq);
-
- /* Put it into the bucket. */
- TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq);
- scp->sch_length++;
- syn_cache_count++;
-
- TCP_STATINC(TCP_STAT_SC_ADDED);
- splx(s);
-}
-
-/*
- * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
- * If we have retransmitted an entry the maximum number of times, expire
- * that entry.
- */
-static void
-syn_cache_timer(void *arg)
-{
- struct syn_cache *sc = arg;
-
- mutex_enter(softnet_lock);
- KERNEL_LOCK(1, NULL);
-
- callout_ack(&sc->sc_timer);
-
- if (__predict_false(sc->sc_flags & SCF_DEAD)) {
- TCP_STATINC(TCP_STAT_SC_DELAYED_FREE);
- goto free;
- }
-
- if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
- /* Drop it -- too many retransmissions. */
- goto dropit;
- }
-
- /*
- * Compute the total amount of time this entry has
- * been on a queue. If this entry has been on longer
- * than the keep alive timer would allow, expire it.
- */
- sc->sc_rxttot += sc->sc_rxtcur;
- if (sc->sc_rxttot >= MIN(tcp_keepinit, TCP_TIMER_MAXTICKS))
- goto dropit;
-
- TCP_STATINC(TCP_STAT_SC_RETRANSMITTED);
- (void)syn_cache_respond(sc);
-
- /* Advance the timer back-off. */
- sc->sc_rxtshift++;
- syn_cache_timer_arm(sc);
-
- goto out;
-
- dropit:
- TCP_STATINC(TCP_STAT_SC_TIMED_OUT);
- syn_cache_rm(sc);
- if (sc->sc_ipopts)
- (void) m_free(sc->sc_ipopts);
- rtcache_free(&sc->sc_route);
-
- free:
- callout_destroy(&sc->sc_timer);
- pool_put(&syn_cache_pool, sc);
-
- out:
- KERNEL_UNLOCK_ONE(NULL);
- mutex_exit(softnet_lock);
-}
-
-/*
- * Remove syn cache created by the specified tcb entry,
- * because this does not make sense to keep them
- * (if there's no tcb entry, syn cache entry will never be used)
- */
-void
-syn_cache_cleanup(struct tcpcb *tp)
-{
- struct syn_cache *sc, *nsc;
- int s;
-
- s = splsoftnet();
-
- for (sc = LIST_FIRST(&tp->t_sc); sc != NULL; sc = nsc) {
- nsc = LIST_NEXT(sc, sc_tpq);
-
-#ifdef DIAGNOSTIC
- if (sc->sc_tp != tp)
- panic("invalid sc_tp in syn_cache_cleanup");
-#endif
- syn_cache_rm(sc);
- syn_cache_put(sc); /* calls pool_put but see spl above */
- }
- /* just for safety */
- LIST_INIT(&tp->t_sc);
-
- splx(s);
-}
-
-/*
- * Find an entry in the syn cache.
- */
-struct syn_cache *
-syn_cache_lookup(const struct sockaddr *src, const struct sockaddr *dst,
- struct syn_cache_head **headp)
-{
- struct syn_cache *sc;
- struct syn_cache_head *scp;
- u_int32_t hash;
- int s;
-
- SYN_HASHALL(hash, src, dst);
-
- scp = &tcp_syn_cache[hash % tcp_syn_cache_size];
- *headp = scp;
- s = splsoftnet();
- for (sc = TAILQ_FIRST(&scp->sch_bucket); sc != NULL;
- sc = TAILQ_NEXT(sc, sc_bucketq)) {
- if (sc->sc_hash != hash)
- continue;
- if (!memcmp(&sc->sc_src, src, src->sa_len) &&
- !memcmp(&sc->sc_dst, dst, dst->sa_len)) {
- splx(s);
- return (sc);
- }
- }
- splx(s);
- return (NULL);
-}
-
-/*
- * This function gets called when we receive an ACK for a socket in the
- * LISTEN state. We look up the connection in the syn cache, and if it's
- * there, we pull it out of the cache and turn it into a full-blown
- * connection in the SYN-RECEIVED state.
- *
- * The return values may not be immediately obvious, and their effects
- * can be subtle, so here they are:
- *
- * NULL SYN was not found in cache; caller should drop the
- * packet and send an RST.
- *
- * -1 We were unable to create the new connection, and are
- * aborting it. An ACK,RST is being sent to the peer
- * (unless we got screwey sequence numbers; see below),
- * because the 3-way handshake has been completed. Caller
- * should not free the mbuf, since we may be using it. If
- * we are not, we will free it.
- *
- * Otherwise, the return value is a pointer to the new socket
- * associated with the connection.
- */
-struct socket *
-syn_cache_get(struct sockaddr *src, struct sockaddr *dst,
- struct tcphdr *th, struct socket *so, struct mbuf *m)
-{
- struct syn_cache *sc;
- struct syn_cache_head *scp;
- struct inpcb *inp = NULL;
-#ifdef INET6
- struct in6pcb *in6p = NULL;
-#endif
- struct tcpcb *tp;
- int s;
- struct socket *oso;
-
- s = splsoftnet();
- if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
- splx(s);
- return NULL;
- }
-
- /*
- * Verify the sequence and ack numbers. Try getting the correct
- * response again.
- */
- if ((th->th_ack != sc->sc_iss + 1) ||
- SEQ_LEQ(th->th_seq, sc->sc_irs) ||
- SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) {
- m_freem(m);
- (void)syn_cache_respond(sc);
- splx(s);
- return ((struct socket *)(-1));
- }
-
- /* Remove this cache entry */
- syn_cache_rm(sc);
- splx(s);
-
- /*
- * Ok, create the full blown connection, and set things up
- * as they would have been set up if we had created the
- * connection when the SYN arrived. If we can't create
- * the connection, abort it.
- */
- /*
- * inp still has the OLD in_pcb stuff, set the
- * v6-related flags on the new guy, too. This is
- * done particularly for the case where an AF_INET6
- * socket is bound only to a port, and a v4 connection
- * comes in on that port.
- * we also copy the flowinfo from the original pcb
- * to the new one.
- */
- oso = so;
- so = sonewconn(so, true);
- if (so == NULL)
- goto resetandabort;
-
- switch (so->so_proto->pr_domain->dom_family) {
- case AF_INET:
- inp = sotoinpcb(so);
- break;
-#ifdef INET6
- case AF_INET6:
- in6p = sotoin6pcb(so);
- break;
-#endif
- }
-
- switch (src->sa_family) {
- case AF_INET:
- if (inp) {
- inp->inp_laddr = ((struct sockaddr_in *)dst)->sin_addr;
- inp->inp_lport = ((struct sockaddr_in *)dst)->sin_port;
- inp->inp_options = ip_srcroute(m);
- in_pcbstate(inp, INP_BOUND);
- if (inp->inp_options == NULL) {
- inp->inp_options = sc->sc_ipopts;
- sc->sc_ipopts = NULL;
- }
- }
-#ifdef INET6
- else if (in6p) {
- /* IPv4 packet to AF_INET6 socket */
- memset(&in6p->in6p_laddr, 0, sizeof(in6p->in6p_laddr));
- in6p->in6p_laddr.s6_addr16[5] = htons(0xffff);
- bcopy(&((struct sockaddr_in *)dst)->sin_addr,
- &in6p->in6p_laddr.s6_addr32[3],
- sizeof(((struct sockaddr_in *)dst)->sin_addr));
- in6p->in6p_lport = ((struct sockaddr_in *)dst)->sin_port;
- in6totcpcb(in6p)->t_family = AF_INET;
- if (sotoin6pcb(oso)->in6p_flags & IN6P_IPV6_V6ONLY)
- in6p->in6p_flags |= IN6P_IPV6_V6ONLY;
- else
- in6p->in6p_flags &= ~IN6P_IPV6_V6ONLY;
- in6_pcbstate(in6p, IN6P_BOUND);
- }
-#endif
- break;
-#ifdef INET6
- case AF_INET6:
- if (in6p) {
- in6p->in6p_laddr = ((struct sockaddr_in6 *)dst)->sin6_addr;
- in6p->in6p_lport = ((struct sockaddr_in6 *)dst)->sin6_port;
- in6_pcbstate(in6p, IN6P_BOUND);
- }
- break;
-#endif
- }
-
-#ifdef INET6
- if (in6p && in6totcpcb(in6p)->t_family == AF_INET6 && sotoinpcb(oso)) {
- struct in6pcb *oin6p = sotoin6pcb(oso);
- /* inherit socket options from the listening socket */
- in6p->in6p_flags |= (oin6p->in6p_flags & IN6P_CONTROLOPTS);
- if (in6p->in6p_flags & IN6P_CONTROLOPTS) {
- m_freem(in6p->in6p_options);
- in6p->in6p_options = NULL;
- }
- ip6_savecontrol(in6p, &in6p->in6p_options,
- mtod(m, struct ip6_hdr *), m);
- }
-#endif
-
- /*
- * Give the new socket our cached route reference.
- */
- if (inp) {
- rtcache_copy(&inp->inp_route, &sc->sc_route);
- rtcache_free(&sc->sc_route);
- }
-#ifdef INET6
- else {
- rtcache_copy(&in6p->in6p_route, &sc->sc_route);
- rtcache_free(&sc->sc_route);
- }
-#endif
-
- if (inp) {
- struct sockaddr_in sin;
- memcpy(&sin, src, src->sa_len);
- if (in_pcbconnect(inp, &sin, &lwp0)) {
- goto resetandabort;
- }
- }
-#ifdef INET6
- else if (in6p) {
- struct sockaddr_in6 sin6;
- memcpy(&sin6, src, src->sa_len);
- if (src->sa_family == AF_INET) {
- /* IPv4 packet to AF_INET6 socket */
- in6_sin_2_v4mapsin6((struct sockaddr_in *)src, &sin6);
- }
- if (in6_pcbconnect(in6p, &sin6, NULL)) {
- goto resetandabort;
- }
- }
-#endif
- else {
- goto resetandabort;
- }
-
- if (inp)
- tp = intotcpcb(inp);
-#ifdef INET6
- else if (in6p)
- tp = in6totcpcb(in6p);
-#endif
- else
- tp = NULL;
-
- tp->t_flags = sototcpcb(oso)->t_flags & TF_NODELAY;
- if (sc->sc_request_r_scale != 15) {
- tp->requested_s_scale = sc->sc_requested_s_scale;
- tp->request_r_scale = sc->sc_request_r_scale;
- tp->snd_scale = sc->sc_requested_s_scale;
- tp->rcv_scale = sc->sc_request_r_scale;
- tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
- }
- if (sc->sc_flags & SCF_TIMESTAMP)
- tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
- tp->ts_timebase = sc->sc_timebase;
-
- tp->t_template = tcp_template(tp);
- if (tp->t_template == 0) {
- tp = tcp_drop(tp, ENOBUFS); /* destroys socket */
- so = NULL;
- m_freem(m);
- goto abort;
- }
-
- tp->iss = sc->sc_iss;
- tp->irs = sc->sc_irs;
- tcp_sendseqinit(tp);
- tcp_rcvseqinit(tp);
- tp->t_state = TCPS_SYN_RECEIVED;
- TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
- TCP_STATINC(TCP_STAT_ACCEPTS);
-
- if ((sc->sc_flags & SCF_SACK_PERMIT) && tcp_do_sack)
- tp->t_flags |= TF_WILL_SACK;
-
- if ((sc->sc_flags & SCF_ECN_PERMIT) && tcp_do_ecn)
- tp->t_flags |= TF_ECN_PERMIT;
-
-#ifdef TCP_SIGNATURE
- if (sc->sc_flags & SCF_SIGNATURE)
- tp->t_flags |= TF_SIGNATURE;
-#endif
-
- /* Initialize tp->t_ourmss before we deal with the peer's! */
- tp->t_ourmss = sc->sc_ourmaxseg;
- tcp_mss_from_peer(tp, sc->sc_peermaxseg);
-
- /*
- * Initialize the initial congestion window. If we
- * had to retransmit the SYN,ACK, we must initialize cwnd
- * to 1 segment (i.e. the Loss Window).
- */
- if (sc->sc_rxtshift)
- tp->snd_cwnd = tp->t_peermss;
- else {
- int ss = tcp_init_win;
- if (inp != NULL && in_localaddr(inp->inp_faddr))
- ss = tcp_init_win_local;
-#ifdef INET6
- if (in6p != NULL && in6_localaddr(&in6p->in6p_faddr))
- ss = tcp_init_win_local;
-#endif
- tp->snd_cwnd = TCP_INITIAL_WINDOW(ss, tp->t_peermss);
- }
-
- tcp_rmx_rtt(tp);
- tp->snd_wl1 = sc->sc_irs;
- tp->rcv_up = sc->sc_irs + 1;
-
- /*
- * This is what would have happened in tcp_output() when
- * the SYN,ACK was sent.
- */
- tp->snd_up = tp->snd_una;
- tp->snd_max = tp->snd_nxt = tp->iss+1;
- TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
- if (sc->sc_win > 0 && SEQ_GT(tp->rcv_nxt + sc->sc_win, tp->rcv_adv))
- tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
- tp->last_ack_sent = tp->rcv_nxt;
- tp->t_partialacks = -1;
- tp->t_dupacks = 0;
-
- TCP_STATINC(TCP_STAT_SC_COMPLETED);
- s = splsoftnet();
- syn_cache_put(sc);
- splx(s);
- return so;
-
-resetandabort:
- (void)tcp_respond(NULL, m, m, th, (tcp_seq)0, th->th_ack, TH_RST);
-abort:
- if (so != NULL) {
- (void) soqremque(so, 1);
- (void) soabort(so);
- mutex_enter(softnet_lock);
- }
- s = splsoftnet();
- syn_cache_put(sc);
- splx(s);
- TCP_STATINC(TCP_STAT_SC_ABORTED);
- return ((struct socket *)(-1));
-}
-
-/*
- * This function is called when we get a RST for a
- * non-existent connection, so that we can see if the
- * connection is in the syn cache. If it is, zap it.
- */
-
-void
-syn_cache_reset(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th)
-{
- struct syn_cache *sc;
- struct syn_cache_head *scp;
- int s = splsoftnet();
-
- if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
- splx(s);
- return;
- }
- if (SEQ_LT(th->th_seq, sc->sc_irs) ||
- SEQ_GT(th->th_seq, sc->sc_irs+1)) {
- splx(s);
- return;
- }
- syn_cache_rm(sc);
- TCP_STATINC(TCP_STAT_SC_RESET);
- syn_cache_put(sc); /* calls pool_put but see spl above */
- splx(s);
-}
-
-void
-syn_cache_unreach(const struct sockaddr *src, const struct sockaddr *dst,
- struct tcphdr *th)
-{
- struct syn_cache *sc;
- struct syn_cache_head *scp;
- int s;
-
- s = splsoftnet();
- if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
- splx(s);
- return;
- }
- /* If the sequence number != sc_iss, then it's a bogus ICMP msg */
- if (ntohl(th->th_seq) != sc->sc_iss) {
- splx(s);
- return;
- }
-
- /*
- * If we've retransmitted 3 times and this is our second error,
- * we remove the entry. Otherwise, we allow it to continue on.
- * This prevents us from incorrectly nuking an entry during a
- * spurious network outage.
- *
- * See tcp_notify().
- */
- if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtshift < 3) {
- sc->sc_flags |= SCF_UNREACH;
- splx(s);
- return;
- }
-
- syn_cache_rm(sc);
- TCP_STATINC(TCP_STAT_SC_UNREACH);
- syn_cache_put(sc); /* calls pool_put but see spl above */
- splx(s);
-}
-
-/*
- * Given a LISTEN socket and an inbound SYN request, add this to the syn
- * cache, and send back a segment:
- * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
- * to the source.
- *
- * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
- * Doing so would require that we hold onto the data and deliver it
- * to the application. However, if we are the target of a SYN-flood
- * DoS attack, an attacker could send data which would eventually
- * consume all available buffer space if it were ACKed. By not ACKing
- * the data, we avoid this DoS scenario.
- */
-int
-syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
- unsigned int toff, struct socket *so, struct mbuf *m, u_char *optp,
- int optlen, struct tcp_opt_info *oi)
-{
- struct tcpcb tb, *tp;
- long win;
- struct syn_cache *sc;
- struct syn_cache_head *scp;
- struct mbuf *ipopts;
- int s;
-
- tp = sototcpcb(so);
-
- /*
- * Initialize some local state.
- */
- win = sbspace(&so->so_rcv);
- if (win > TCP_MAXWIN)
- win = TCP_MAXWIN;
-
-#ifdef TCP_SIGNATURE
- if (optp || (tp->t_flags & TF_SIGNATURE))
-#else
- if (optp)
-#endif
- {
- tb.t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
-#ifdef TCP_SIGNATURE
- tb.t_flags |= (tp->t_flags & TF_SIGNATURE);
-#endif
- tb.t_state = TCPS_LISTEN;
- if (tcp_dooptions(&tb, optp, optlen, th, m, toff, oi) < 0)
- return 0;
- } else
- tb.t_flags = 0;
-
- switch (src->sa_family) {
- case AF_INET:
- /* Remember the IP options, if any. */
- ipopts = ip_srcroute(m);
- break;
- default:
- ipopts = NULL;
- }
-
- /*
- * See if we already have an entry for this connection.
- * If we do, resend the SYN,ACK. We do not count this
- * as a retransmission (XXX though maybe we should).
- */
- if ((sc = syn_cache_lookup(src, dst, &scp)) != NULL) {
- TCP_STATINC(TCP_STAT_SC_DUPESYN);
- if (ipopts) {
- /*
- * If we were remembering a previous source route,
- * forget it and use the new one we've been given.
- */
- if (sc->sc_ipopts)
- (void)m_free(sc->sc_ipopts);
- sc->sc_ipopts = ipopts;
- }
- sc->sc_timestamp = tb.ts_recent;
- m_freem(m);
- if (syn_cache_respond(sc) == 0) {
- uint64_t *tcps = TCP_STAT_GETREF();
- tcps[TCP_STAT_SNDACKS]++;
- tcps[TCP_STAT_SNDTOTAL]++;
- TCP_STAT_PUTREF();
- }
- return 1;
- }
-
- s = splsoftnet();
- sc = pool_get(&syn_cache_pool, PR_NOWAIT);
- splx(s);
- if (sc == NULL) {
- if (ipopts)
- (void)m_free(ipopts);
- return 0;
- }
-
- /*
- * Fill in the cache, and put the necessary IP and TCP
- * options into the reply.
- */
- memset(sc, 0, sizeof(struct syn_cache));
- callout_init(&sc->sc_timer, CALLOUT_MPSAFE);
- memcpy(&sc->sc_src, src, src->sa_len);
- memcpy(&sc->sc_dst, dst, dst->sa_len);
- sc->sc_flags = 0;
- sc->sc_ipopts = ipopts;
- sc->sc_irs = th->th_seq;
- switch (src->sa_family) {
- case AF_INET:
- {
- struct sockaddr_in *srcin = (void *)src;
- struct sockaddr_in *dstin = (void *)dst;
-
- sc->sc_iss = tcp_new_iss1(&dstin->sin_addr,
- &srcin->sin_addr, dstin->sin_port,
- srcin->sin_port, sizeof(dstin->sin_addr));
- break;
- }
-#ifdef INET6
- case AF_INET6:
- {
- struct sockaddr_in6 *srcin6 = (void *)src;
- struct sockaddr_in6 *dstin6 = (void *)dst;
-
- sc->sc_iss = tcp_new_iss1(&dstin6->sin6_addr,
- &srcin6->sin6_addr, dstin6->sin6_port,
- srcin6->sin6_port, sizeof(dstin6->sin6_addr));
- break;
- }
-#endif
- }
- sc->sc_peermaxseg = oi->maxseg;
- sc->sc_ourmaxseg = tcp_mss_to_advertise(m->m_flags & M_PKTHDR ?
- m_get_rcvif_NOMPSAFE(m) : NULL, sc->sc_src.sa.sa_family);
- sc->sc_win = win;
- sc->sc_timebase = tcp_now - 1; /* see tcp_newtcpcb() */
- sc->sc_timestamp = tb.ts_recent;
- if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) ==
- (TF_REQ_TSTMP|TF_RCVD_TSTMP))
- sc->sc_flags |= SCF_TIMESTAMP;
- if ((tb.t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
- (TF_RCVD_SCALE|TF_REQ_SCALE)) {
- sc->sc_requested_s_scale = tb.requested_s_scale;
- sc->sc_request_r_scale = 0;
- /*
- * Pick the smallest possible scaling factor that
- * will still allow us to scale up to sb_max.
- *
- * We do this because there are broken firewalls that
- * will corrupt the window scale option, leading to
- * the other endpoint believing that our advertised
- * window is unscaled. At scale factors larger than
- * 5 the unscaled window will drop below 1500 bytes,
- * leading to serious problems when traversing these
- * broken firewalls.
- *
- * With the default sbmax of 256K, a scale factor
- * of 3 will be chosen by this algorithm. Those who
- * choose a larger sbmax should watch out
- * for the compatibility problems mentioned above.
- *
- * RFC1323: The Window field in a SYN (i.e., a <SYN>
- * or <SYN,ACK>) segment itself is never scaled.
- */
- while (sc->sc_request_r_scale < TCP_MAX_WINSHIFT &&
- (TCP_MAXWIN << sc->sc_request_r_scale) < sb_max)
- sc->sc_request_r_scale++;
- } else {
- sc->sc_requested_s_scale = 15;
- sc->sc_request_r_scale = 15;
- }
- if ((tb.t_flags & TF_SACK_PERMIT) && tcp_do_sack)
- sc->sc_flags |= SCF_SACK_PERMIT;
-
- /*
- * ECN setup packet received.
- */
- if ((th->th_flags & (TH_ECE|TH_CWR)) && tcp_do_ecn)
- sc->sc_flags |= SCF_ECN_PERMIT;
-
-#ifdef TCP_SIGNATURE
- if (tb.t_flags & TF_SIGNATURE)
- sc->sc_flags |= SCF_SIGNATURE;
-#endif
- sc->sc_tp = tp;
- m_freem(m);
- if (syn_cache_respond(sc) == 0) {
- uint64_t *tcps = TCP_STAT_GETREF();
- tcps[TCP_STAT_SNDACKS]++;
- tcps[TCP_STAT_SNDTOTAL]++;
- TCP_STAT_PUTREF();
- syn_cache_insert(sc, tp);
- } else {
- s = splsoftnet();
- /*
- * syn_cache_put() will try to schedule the timer, so
- * we need to initialize it
- */
- syn_cache_timer_arm(sc);
- syn_cache_put(sc);
- splx(s);
- TCP_STATINC(TCP_STAT_SC_DROPPED);
- }
- return 1;
-}
-
-/*
- * syn_cache_respond: (re)send SYN+ACK.
- *
- * Returns 0 on success.
- */
-
-int
-syn_cache_respond(struct syn_cache *sc)
-{
-#ifdef INET6
- struct rtentry *rt = NULL;
-#endif
- struct route *ro;
- u_int8_t *optp;
- int optlen, error;
- u_int16_t tlen;
- struct ip *ip = NULL;
-#ifdef INET6
- struct ip6_hdr *ip6 = NULL;
-#endif
- struct tcpcb *tp;
- struct tcphdr *th;
- struct mbuf *m;
- u_int hlen;
-#ifdef TCP_SIGNATURE
- struct secasvar *sav = NULL;
- u_int8_t *sigp = NULL;
-#endif
-
- ro = &sc->sc_route;
- switch (sc->sc_src.sa.sa_family) {
- case AF_INET:
- hlen = sizeof(struct ip);
- break;
-#ifdef INET6
- case AF_INET6:
- hlen = sizeof(struct ip6_hdr);
- break;
-#endif
- default:
- return EAFNOSUPPORT;
- }
-
- /* Worst case scenario, since we don't know the option size yet. */
- tlen = hlen + sizeof(struct tcphdr) + MAX_TCPOPTLEN;
- KASSERT(max_linkhdr + tlen <= MCLBYTES);
-
- /*
- * Create the IP+TCP header from scratch.
- */
- MGETHDR(m, M_DONTWAIT, MT_DATA);
- if (m && (max_linkhdr + tlen) > MHLEN) {
- MCLGET(m, M_DONTWAIT);
- if ((m->m_flags & M_EXT) == 0) {
- m_freem(m);
- m = NULL;
- }
- }
- if (m == NULL)
- return ENOBUFS;
- MCLAIM(m, &tcp_tx_mowner);
-
- tp = sc->sc_tp;
-
- /* Fixup the mbuf. */
- m->m_data += max_linkhdr;
- m_reset_rcvif(m);
- memset(mtod(m, void *), 0, tlen);
-
- switch (sc->sc_src.sa.sa_family) {
- case AF_INET:
- ip = mtod(m, struct ip *);
- ip->ip_v = 4;
- ip->ip_dst = sc->sc_src.sin.sin_addr;
- ip->ip_src = sc->sc_dst.sin.sin_addr;
- ip->ip_p = IPPROTO_TCP;
- th = (struct tcphdr *)(ip + 1);
- th->th_dport = sc->sc_src.sin.sin_port;
- th->th_sport = sc->sc_dst.sin.sin_port;
- break;
-#ifdef INET6
- case AF_INET6:
- ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_vfc = IPV6_VERSION;
- ip6->ip6_dst = sc->sc_src.sin6.sin6_addr;
- ip6->ip6_src = sc->sc_dst.sin6.sin6_addr;
- ip6->ip6_nxt = IPPROTO_TCP;
- /* ip6_plen will be updated in ip6_output() */
- th = (struct tcphdr *)(ip6 + 1);
- th->th_dport = sc->sc_src.sin6.sin6_port;
- th->th_sport = sc->sc_dst.sin6.sin6_port;
- break;
-#endif
- default:
- panic("%s: impossible (1)", __func__);
- }
-
- th->th_seq = htonl(sc->sc_iss);
- th->th_ack = htonl(sc->sc_irs + 1);
- th->th_flags = TH_SYN|TH_ACK;
- th->th_win = htons(sc->sc_win);
- /* th_x2, th_sum, th_urp already 0 from memset */
-
- /* Tack on the TCP options. */
- optp = (u_int8_t *)(th + 1);
- optlen = 0;
- *optp++ = TCPOPT_MAXSEG;
- *optp++ = TCPOLEN_MAXSEG;
- *optp++ = (sc->sc_ourmaxseg >> 8) & 0xff;
- *optp++ = sc->sc_ourmaxseg & 0xff;
- optlen += TCPOLEN_MAXSEG;
-
- if (sc->sc_request_r_scale != 15) {
- *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
- TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
- sc->sc_request_r_scale);
- optp += TCPOLEN_WINDOW + TCPOLEN_NOP;
- optlen += TCPOLEN_WINDOW + TCPOLEN_NOP;
- }
-
- if (sc->sc_flags & SCF_SACK_PERMIT) {
- /* Let the peer know that we will SACK. */
- *optp++ = TCPOPT_SACK_PERMITTED;
- *optp++ = TCPOLEN_SACK_PERMITTED;
- optlen += TCPOLEN_SACK_PERMITTED;
- }
-
- if (sc->sc_flags & SCF_TIMESTAMP) {
- while (optlen % 4 != 2) {
- optlen += TCPOLEN_NOP;
- *optp++ = TCPOPT_NOP;
- }
- *optp++ = TCPOPT_TIMESTAMP;
- *optp++ = TCPOLEN_TIMESTAMP;
- u_int32_t *lp = (u_int32_t *)(optp);
- /* Form timestamp option as shown in appendix A of RFC 1323. */
- *lp++ = htonl(SYN_CACHE_TIMESTAMP(sc));
- *lp = htonl(sc->sc_timestamp);
- optp += TCPOLEN_TIMESTAMP - 2;
- optlen += TCPOLEN_TIMESTAMP;
- }
-
-#ifdef TCP_SIGNATURE
- if (sc->sc_flags & SCF_SIGNATURE) {
- sav = tcp_signature_getsav(m);
- if (sav == NULL) {
- m_freem(m);
- return EPERM;
- }
-
- *optp++ = TCPOPT_SIGNATURE;
- *optp++ = TCPOLEN_SIGNATURE;
- sigp = optp;
- memset(optp, 0, TCP_SIGLEN);
- optp += TCP_SIGLEN;
- optlen += TCPOLEN_SIGNATURE;
- }
-#endif
-
- /*
- * Terminate and pad TCP options to a 4 byte boundary.
- *
- * According to RFC793: "The content of the header beyond the
- * End-of-Option option must be header padding (i.e., zero)."
- * And later: "The padding is composed of zeros."
- */
- if (optlen % 4) {
- optlen += TCPOLEN_EOL;
- *optp++ = TCPOPT_EOL;
- }
- while (optlen % 4) {
- optlen += TCPOLEN_PAD;
- *optp++ = TCPOPT_PAD;
- }
-
- /* Compute the actual values now that we've added the options. */
- tlen = hlen + sizeof(struct tcphdr) + optlen;
- m->m_len = m->m_pkthdr.len = tlen;
- th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
-
-#ifdef TCP_SIGNATURE
- if (sav) {
- (void)tcp_signature(m, th, hlen, sav, sigp);
- key_sa_recordxfer(sav, m);
- KEY_SA_UNREF(&sav);
- }
-#endif
-
- /*
- * Send ECN SYN-ACK setup packet.
- * Routes can be asymmetric, so, even if we receive a packet
- * with ECE and CWR set, we must not assume no one will block
- * the ECE packet we are about to send.
- */
- if ((sc->sc_flags & SCF_ECN_PERMIT) && tp &&
- SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
- th->th_flags |= TH_ECE;
- TCP_STATINC(TCP_STAT_ECN_SHS);
-
- /*
- * draft-ietf-tcpm-ecnsyn-00.txt
- *
- * "[...] a TCP node MAY respond to an ECN-setup
- * SYN packet by setting ECT in the responding
- * ECN-setup SYN/ACK packet, indicating to routers
- * that the SYN/ACK packet is ECN-Capable.
- * This allows a congested router along the path
- * to mark the packet instead of dropping the
- * packet as an indication of congestion."
- *
- * "[...] There can be a great benefit in setting
- * an ECN-capable codepoint in SYN/ACK packets [...]
- * Congestion is most likely to occur in
- * the server-to-client direction. As a result,
- * setting an ECN-capable codepoint in SYN/ACK
- * packets can reduce the occurrence of three-second
- * retransmit timeouts resulting from the drop
- * of SYN/ACK packets."
- *
- * Page 4 and 6, January 2006.
- */
-
- switch (sc->sc_src.sa.sa_family) {
- case AF_INET:
- ip->ip_tos |= IPTOS_ECN_ECT0;
- break;
-#ifdef INET6
- case AF_INET6:
- ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
- break;
-#endif
- }
- TCP_STATINC(TCP_STAT_ECN_ECT);
- }
-
-
- /*
- * Compute the packet's checksum.
- *
- * Fill in some straggling IP bits. Note the stack expects
- * ip_len to be in host order, for convenience.
- */
- switch (sc->sc_src.sa.sa_family) {
- case AF_INET:
- ip->ip_len = htons(tlen - hlen);
- th->th_sum = 0;
- th->th_sum = in4_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
- ip->ip_len = htons(tlen);
- ip->ip_ttl = ip_defttl;
- /* XXX tos? */
- break;
-#ifdef INET6
- case AF_INET6:
- ip6->ip6_plen = htons(tlen - hlen);
- th->th_sum = 0;
- th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
- ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
- ip6->ip6_vfc |= IPV6_VERSION;
- ip6->ip6_plen = htons(tlen - hlen);
- /* ip6_hlim will be initialized afterwards */
- /* XXX flowlabel? */
- break;
-#endif
- }
-
- /* XXX use IPsec policy on listening socket, on SYN ACK */
- tp = sc->sc_tp;
-
- switch (sc->sc_src.sa.sa_family) {
- case AF_INET:
- error = ip_output(m, sc->sc_ipopts, ro,
- (ip_mtudisc ? IP_MTUDISC : 0),
- NULL, tp ? tp->t_inpcb : NULL);
- break;
-#ifdef INET6
- case AF_INET6:
- ip6->ip6_hlim = in6_selecthlim(NULL,
- (rt = rtcache_validate(ro)) != NULL ? rt->rt_ifp : NULL);
- rtcache_unref(rt, ro);
-
- error = ip6_output(m, NULL /*XXX*/, ro, 0, NULL,
- tp ? tp->t_in6pcb : NULL, NULL);
- break;
-#endif
- default:
- panic("%s: impossible (2)", __func__);
- }
-
- return error;
-}
Index: src/sys/netinet/tcp_subr.c
diff -u src/sys/netinet/tcp_subr.c:1.290 src/sys/netinet/tcp_subr.c:1.291
--- src/sys/netinet/tcp_subr.c:1.290 Mon Jun 27 01:29:51 2022
+++ src/sys/netinet/tcp_subr.c Tue Sep 20 07:19:14 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tcp_subr.c,v 1.290 2022/06/27 01:29:51 knakahara Exp $ */
+/* $NetBSD: tcp_subr.c,v 1.291 2022/09/20 07:19:14 ozaki-r Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.290 2022/06/27 01:29:51 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.291 2022/09/20 07:19:14 ozaki-r Exp $");
#ifdef _KERNEL_OPT
#include "opt_inet.h"
@@ -143,6 +143,7 @@ __KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v
#include <netinet/tcp_vtw.h>
#include <netinet/tcp_private.h>
#include <netinet/tcp_congctl.h>
+#include <netinet/tcp_syncache.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -222,14 +223,6 @@ int tcp_vtw_entries = 1 << 4; /* 16 vest
#endif
int tcbhashsize = TCBHASHSIZE;
-/* syn hash parameters */
-#define TCP_SYN_HASH_SIZE 293
-#define TCP_SYN_BUCKET_SIZE 35
-int tcp_syn_cache_size = TCP_SYN_HASH_SIZE;
-int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
-int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
-struct syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];
-
int tcp_freeq(struct tcpcb *);
static int tcp_iss_secret_init(void);
Index: src/sys/netinet/tcp_usrreq.c
diff -u src/sys/netinet/tcp_usrreq.c:1.231 src/sys/netinet/tcp_usrreq.c:1.232
--- src/sys/netinet/tcp_usrreq.c:1.231 Tue Jun 28 01:44:19 2022
+++ src/sys/netinet/tcp_usrreq.c Tue Sep 20 07:19:14 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tcp_usrreq.c,v 1.231 2022/06/28 01:44:19 riastradh Exp $ */
+/* $NetBSD: tcp_usrreq.c,v 1.232 2022/09/20 07:19:14 ozaki-r Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -99,7 +99,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.231 2022/06/28 01:44:19 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.232 2022/09/20 07:19:14 ozaki-r Exp $");
#ifdef _KERNEL_OPT
#include "opt_inet.h"
@@ -151,6 +151,7 @@ __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c
#include <netinet/tcp_congctl.h>
#include <netinet/tcp_debug.h>
#include <netinet/tcp_vtw.h>
+#include <netinet/tcp_syncache.h>
static int
tcp_debug_capture(struct tcpcb *tp, int req)
Index: src/sys/netinet/tcp_var.h
diff -u src/sys/netinet/tcp_var.h:1.196 src/sys/netinet/tcp_var.h:1.197
--- src/sys/netinet/tcp_var.h:1.196 Sat Jul 31 20:29:37 2021
+++ src/sys/netinet/tcp_var.h Tue Sep 20 07:19:14 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: tcp_var.h,v 1.196 2021/07/31 20:29:37 andvar Exp $ */
+/* $NetBSD: tcp_var.h,v 1.197 2022/09/20 07:19:14 ozaki-r Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -205,6 +205,8 @@ struct sackhole {
TAILQ_ENTRY(sackhole) sackhole_q;
};
+struct syn_cache;
+
/*
* Tcp control block, one per tcp; fields:
*/
@@ -520,57 +522,6 @@ struct tcp_opt_info {
#define TOF_SIGNATURE 0x0040 /* signature option present */
#define TOF_SIGLEN 0x0080 /* sigature length valid (RFC2385) */
-/*
- * Data for the TCP compressed state engine.
- */
-union syn_cache_sa {
- struct sockaddr sa;
- struct sockaddr_in sin;
-#if 1 /*def INET6*/
- struct sockaddr_in6 sin6;
-#endif
-};
-
-struct syn_cache {
- TAILQ_ENTRY(syn_cache) sc_bucketq; /* link on bucket list */
- callout_t sc_timer; /* rexmt timer */
- struct route sc_route;
- long sc_win; /* advertised window */
- int sc_bucketidx; /* our bucket index */
- u_int32_t sc_hash;
- u_int32_t sc_timestamp; /* timestamp from SYN */
- u_int32_t sc_timebase; /* our local timebase */
- union syn_cache_sa sc_src;
- union syn_cache_sa sc_dst;
- tcp_seq sc_irs;
- tcp_seq sc_iss;
- u_int sc_rxtcur; /* current rxt timeout */
- u_int sc_rxttot; /* total time spend on queues */
- u_short sc_rxtshift; /* for computing backoff */
- u_short sc_flags;
-
-#define SCF_UNREACH 0x0001 /* we've had an unreach error */
-#define SCF_TIMESTAMP 0x0002 /* peer will do timestamps */
-#define SCF_DEAD 0x0004 /* this entry to be released */
-#define SCF_SACK_PERMIT 0x0008 /* peer will do SACK */
-#define SCF_ECN_PERMIT 0x0010 /* peer will do ECN */
-#define SCF_SIGNATURE 0x40 /* send MD5 digests */
-
- struct mbuf *sc_ipopts; /* IP options */
- u_int16_t sc_peermaxseg;
- u_int16_t sc_ourmaxseg;
- u_int8_t sc_request_r_scale : 4,
- sc_requested_s_scale : 4;
-
- struct tcpcb *sc_tp; /* tcb for listening socket */
- LIST_ENTRY(syn_cache) sc_tpq; /* list of entries by same tp */
-};
-
-struct syn_cache_head {
- TAILQ_HEAD(, syn_cache) sch_bucket; /* bucket entries */
- u_short sch_length; /* # entries in bucket */
-};
-
#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
#ifdef INET6
#define in6totcpcb(ip) ((struct tcpcb *)(ip)->in6p_ppcb)
@@ -803,8 +754,6 @@ extern int tcp_mss_ifmtu; /* take MSS fr
extern int tcp_cwm; /* enable Congestion Window Monitoring */
extern int tcp_cwm_burstsize; /* burst size allowed by CWM */
extern int tcp_ack_on_push; /* ACK immediately on PUSH */
-extern int tcp_syn_cache_limit; /* max entries for compressed state engine */
-extern int tcp_syn_bucket_limit;/* max entries per hash bucket */
extern int tcp_log_refused; /* log refused connections */
extern int tcp_do_ecn; /* TCP ECN enabled/disabled? */
extern int tcp_ecn_maxretries; /* Max ECN setup retries */
@@ -829,10 +778,6 @@ extern int tcp_vtw_entries;
extern int tcp_rst_ppslim;
extern int tcp_ackdrop_ppslim;
-extern int tcp_syn_cache_size;
-extern struct syn_cache_head tcp_syn_cache[];
-extern u_long syn_cache_count;
-
#ifdef MBUFTRACE
extern struct mowner tcp_rx_mowner;
extern struct mowner tcp_tx_mowner;
@@ -940,24 +885,11 @@ int tcp_sack_numblks(const struct tcpcb
void tcp_statinc(u_int);
void tcp_statadd(u_int, uint64_t);
-int syn_cache_add(struct sockaddr *, struct sockaddr *,
- struct tcphdr *, unsigned int, struct socket *,
- struct mbuf *, u_char *, int, struct tcp_opt_info *);
-void syn_cache_unreach(const struct sockaddr *, const struct sockaddr *,
- struct tcphdr *);
-struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *,
- struct tcphdr *, struct socket *so, struct mbuf *);
-void syn_cache_init(void);
-void syn_cache_insert(struct syn_cache *, struct tcpcb *);
-struct syn_cache *syn_cache_lookup(const struct sockaddr *, const struct sockaddr *,
- struct syn_cache_head **);
-void syn_cache_reset(struct sockaddr *, struct sockaddr *,
- struct tcphdr *);
-int syn_cache_respond(struct syn_cache *);
-void syn_cache_cleanup(struct tcpcb *);
-
int tcp_input_checksum(int, struct mbuf *, const struct tcphdr *, int, int,
int);
+
+int tcp_dooptions(struct tcpcb *, const u_char *, int,
+ struct tcphdr *, struct mbuf *, int, struct tcp_opt_info *);
#endif
#endif /* !_NETINET_TCP_VAR_H_ */
Index: src/sys/rump/net/lib/libnetinet/Makefile.inc
diff -u src/sys/rump/net/lib/libnetinet/Makefile.inc:1.15 src/sys/rump/net/lib/libnetinet/Makefile.inc:1.16
--- src/sys/rump/net/lib/libnetinet/Makefile.inc:1.15 Mon Mar 8 20:43:22 2021
+++ src/sys/rump/net/lib/libnetinet/Makefile.inc Tue Sep 20 07:19:14 2022
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile.inc,v 1.15 2021/03/08 20:43:22 christos Exp $
+# $NetBSD: Makefile.inc,v 1.16 2022/09/20 07:19:14 ozaki-r Exp $
#
.PATH: ${.CURDIR}/../../../../netinet
@@ -15,7 +15,7 @@ SRCS+= if_arp.c
# TCP
SRCS+= tcp_congctl.c tcp_input.c tcp_output.c tcp_sack.c tcp_subr.c \
- tcp_timer.c tcp_usrreq.c tcp_vtw.c
+ tcp_syncache.c tcp_timer.c tcp_usrreq.c tcp_vtw.c
# UDP
SRCS+= udp_usrreq.c
Added files:
Index: src/sys/netinet/tcp_syncache.c
diff -u /dev/null src/sys/netinet/tcp_syncache.c:1.1
--- /dev/null Tue Sep 20 07:19:15 2022
+++ src/sys/netinet/tcp_syncache.c Tue Sep 20 07:19:14 2022
@@ -0,0 +1,1380 @@
+/* $NetBSD: tcp_syncache.c,v 1.1 2022/09/20 07:19:14 ozaki-r Exp $ */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
+ *
+ * NRL grants permission for redistribution and use in source and binary
+ * forms, with or without modification, of the software and documentation
+ * created at NRL provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgements:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * This product includes software developed at the Information
+ * Technology Division, US Naval Research Laboratory.
+ * 4. Neither the name of the NRL nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
+ * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation
+ * are those of the authors and should not be interpreted as representing
+ * official policies, either expressed or implied, of the US Naval
+ * Research Laboratory (NRL).
+ */
+
+/*-
+ * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006,
+ * 2011 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Coyote Point Systems, Inc.
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
+ * Facility, NASA Ames Research Center.
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Charles M. Hannum.
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Rui Paulo.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
+ */
+
+/*
+ * TODO list for SYN cache stuff:
+ *
+ * Find room for a "state" field, which is needed to keep a
+ * compressed state for TIME_WAIT TCBs. It's been noted already
+ * that this is fairly important for very high-volume web and
+ * mail servers, which use a large number of short-lived
+ * connections.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: tcp_syncache.c,v 1.1 2022/09/20 07:19:14 ozaki-r Exp $");
+
+#ifdef _KERNEL_OPT
+#include "opt_inet.h"
+#include "opt_ipsec.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+#include <sys/pool.h>
+#include <sys/domain.h>
+#include <sys/kernel.h>
+#include <sys/lwp.h> /* for lwp0 */
+#include <sys/cprng.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+
+#include <netinet/ip6.h>
+#ifdef INET6
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_var.h>
+#endif
+
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_private.h>
+#include <netinet/tcp_syncache.h>
+
+#ifdef TCP_SIGNATURE
+#ifdef IPSEC
+#include <netipsec/ipsec.h>
+#include <netipsec/key.h>
+#ifdef INET6
+#include <netipsec/ipsec6.h>
+#endif
+#endif /* IPSEC*/
+#endif
+
+static void syn_cache_timer(void *);
+
+/* syn hash parameters */
+#define TCP_SYN_HASH_SIZE 293
+#define TCP_SYN_BUCKET_SIZE 35
+static int tcp_syn_cache_size = TCP_SYN_HASH_SIZE;
+int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
+int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
+static struct syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];
+
+/*
+ * TCP compressed state engine. Currently used to hold compressed
+ * state for SYN_RECEIVED.
+ */
+
+u_long syn_cache_count;
+static u_int32_t syn_hash1, syn_hash2;
+
+#define SYN_HASH(sa, sp, dp) \
+ ((((sa)->s_addr^syn_hash1)*(((((u_int32_t)(dp))<<16) + \
+ ((u_int32_t)(sp)))^syn_hash2)))
+#ifndef INET6
+#define SYN_HASHALL(hash, src, dst) \
+do { \
+ hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr, \
+ ((const struct sockaddr_in *)(src))->sin_port, \
+ ((const struct sockaddr_in *)(dst))->sin_port); \
+} while (/*CONSTCOND*/ 0)
+#else
+#define SYN_HASH6(sa, sp, dp) \
+ ((((sa)->s6_addr32[0] ^ (sa)->s6_addr32[3] ^ syn_hash1) * \
+ (((((u_int32_t)(dp))<<16) + ((u_int32_t)(sp)))^syn_hash2)) \
+ & 0x7fffffff)
+
+#define SYN_HASHALL(hash, src, dst) \
+do { \
+ switch ((src)->sa_family) { \
+ case AF_INET: \
+ hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr, \
+ ((const struct sockaddr_in *)(src))->sin_port, \
+ ((const struct sockaddr_in *)(dst))->sin_port); \
+ break; \
+ case AF_INET6: \
+ hash = SYN_HASH6(&((const struct sockaddr_in6 *)(src))->sin6_addr, \
+ ((const struct sockaddr_in6 *)(src))->sin6_port, \
+ ((const struct sockaddr_in6 *)(dst))->sin6_port); \
+ break; \
+ default: \
+ hash = 0; \
+ } \
+} while (/*CONSTCOND*/0)
+#endif /* INET6 */
+
+static struct pool syn_cache_pool;
+
+/*
+ * We don't estimate RTT with SYNs, so each packet starts with the default
+ * RTT and each timer step has a fixed timeout value.
+ */
+static inline void
+syn_cache_timer_arm(struct syn_cache *sc)
+{
+
+ TCPT_RANGESET(sc->sc_rxtcur,
+ TCPTV_SRTTDFLT * tcp_backoff[sc->sc_rxtshift], TCPTV_MIN,
+ TCPTV_REXMTMAX);
+ callout_reset(&sc->sc_timer,
+ sc->sc_rxtcur * (hz / PR_SLOWHZ), syn_cache_timer, sc);
+}
+
+#define SYN_CACHE_TIMESTAMP(sc) (tcp_now - (sc)->sc_timebase)
+
+static inline void
+syn_cache_rm(struct syn_cache *sc)
+{
+ TAILQ_REMOVE(&tcp_syn_cache[sc->sc_bucketidx].sch_bucket,
+ sc, sc_bucketq);
+ sc->sc_tp = NULL;
+ LIST_REMOVE(sc, sc_tpq);
+ tcp_syn_cache[sc->sc_bucketidx].sch_length--;
+ callout_stop(&sc->sc_timer);
+ syn_cache_count--;
+}
+
+static inline void
+syn_cache_put(struct syn_cache *sc)
+{
+ if (sc->sc_ipopts)
+ (void) m_free(sc->sc_ipopts);
+ rtcache_free(&sc->sc_route);
+ sc->sc_flags |= SCF_DEAD;
+ if (!callout_invoking(&sc->sc_timer))
+ callout_schedule(&(sc)->sc_timer, 1);
+}
+
+void
+syn_cache_init(void)
+{
+ int i;
+
+ pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, 0, 0,
+ "synpl", NULL, IPL_SOFTNET);
+
+ /* Initialize the hash buckets. */
+ for (i = 0; i < tcp_syn_cache_size; i++)
+ TAILQ_INIT(&tcp_syn_cache[i].sch_bucket);
+}
+
+void
+syn_cache_insert(struct syn_cache *sc, struct tcpcb *tp)
+{
+ struct syn_cache_head *scp;
+ struct syn_cache *sc2;
+ int s;
+
+ /*
+ * If there are no entries in the hash table, reinitialize
+ * the hash secrets.
+ */
+ if (syn_cache_count == 0) {
+ syn_hash1 = cprng_fast32();
+ syn_hash2 = cprng_fast32();
+ }
+
+ SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa);
+ sc->sc_bucketidx = sc->sc_hash % tcp_syn_cache_size;
+ scp = &tcp_syn_cache[sc->sc_bucketidx];
+
+ /*
+ * Make sure that we don't overflow the per-bucket
+ * limit or the total cache size limit.
+ */
+ s = splsoftnet();
+ if (scp->sch_length >= tcp_syn_bucket_limit) {
+ TCP_STATINC(TCP_STAT_SC_BUCKETOVERFLOW);
+ /*
+ * The bucket is full. Toss the oldest element in the
+ * bucket. This will be the first entry in the bucket.
+ */
+ sc2 = TAILQ_FIRST(&scp->sch_bucket);
+#ifdef DIAGNOSTIC
+ /*
+ * This should never happen; we should always find an
+ * entry in our bucket.
+ */
+ if (sc2 == NULL)
+ panic("syn_cache_insert: bucketoverflow: impossible");
+#endif
+ syn_cache_rm(sc2);
+ syn_cache_put(sc2); /* calls pool_put but see spl above */
+ } else if (syn_cache_count >= tcp_syn_cache_limit) {
+ struct syn_cache_head *scp2, *sce;
+
+ TCP_STATINC(TCP_STAT_SC_OVERFLOWED);
+ /*
+ * The cache is full. Toss the oldest entry in the
+ * first non-empty bucket we can find.
+ *
+ * XXX We would really like to toss the oldest
+ * entry in the cache, but we hope that this
+ * condition doesn't happen very often.
+ */
+ scp2 = scp;
+ if (TAILQ_EMPTY(&scp2->sch_bucket)) {
+ sce = &tcp_syn_cache[tcp_syn_cache_size];
+ for (++scp2; scp2 != scp; scp2++) {
+ if (scp2 >= sce)
+ scp2 = &tcp_syn_cache[0];
+ if (! TAILQ_EMPTY(&scp2->sch_bucket))
+ break;
+ }
+#ifdef DIAGNOSTIC
+ /*
+ * This should never happen; we should always find a
+ * non-empty bucket.
+ */
+ if (scp2 == scp)
+ panic("syn_cache_insert: cacheoverflow: "
+ "impossible");
+#endif
+ }
+ sc2 = TAILQ_FIRST(&scp2->sch_bucket);
+ syn_cache_rm(sc2);
+ syn_cache_put(sc2); /* calls pool_put but see spl above */
+ }
+
+ /*
+ * Initialize the entry's timer.
+ */
+ sc->sc_rxttot = 0;
+ sc->sc_rxtshift = 0;
+ syn_cache_timer_arm(sc);
+
+ /* Link it from tcpcb entry */
+ LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq);
+
+ /* Put it into the bucket. */
+ TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq);
+ scp->sch_length++;
+ syn_cache_count++;
+
+ TCP_STATINC(TCP_STAT_SC_ADDED);
+ splx(s);
+}
+
+/*
+ * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
+ * If we have retransmitted an entry the maximum number of times, expire
+ * that entry.
+ */
+static void
+syn_cache_timer(void *arg)
+{
+ struct syn_cache *sc = arg;
+
+ mutex_enter(softnet_lock);
+ KERNEL_LOCK(1, NULL);
+
+ callout_ack(&sc->sc_timer);
+
+ if (__predict_false(sc->sc_flags & SCF_DEAD)) {
+ TCP_STATINC(TCP_STAT_SC_DELAYED_FREE);
+ goto free;
+ }
+
+ if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
+ /* Drop it -- too many retransmissions. */
+ goto dropit;
+ }
+
+ /*
+ * Compute the total amount of time this entry has
+ * been on a queue. If this entry has been on longer
+ * than the keep alive timer would allow, expire it.
+ */
+ sc->sc_rxttot += sc->sc_rxtcur;
+ if (sc->sc_rxttot >= MIN(tcp_keepinit, TCP_TIMER_MAXTICKS))
+ goto dropit;
+
+ TCP_STATINC(TCP_STAT_SC_RETRANSMITTED);
+ (void)syn_cache_respond(sc);
+
+ /* Advance the timer back-off. */
+ sc->sc_rxtshift++;
+ syn_cache_timer_arm(sc);
+
+ goto out;
+
+ dropit:
+ TCP_STATINC(TCP_STAT_SC_TIMED_OUT);
+ syn_cache_rm(sc);
+ if (sc->sc_ipopts)
+ (void) m_free(sc->sc_ipopts);
+ rtcache_free(&sc->sc_route);
+
+ free:
+ callout_destroy(&sc->sc_timer);
+ pool_put(&syn_cache_pool, sc);
+
+ out:
+ KERNEL_UNLOCK_ONE(NULL);
+ mutex_exit(softnet_lock);
+}
+
+/*
+ * Remove syn cache created by the specified tcb entry,
+ * because this does not make sense to keep them
+ * (if there's no tcb entry, syn cache entry will never be used)
+ */
+void
+syn_cache_cleanup(struct tcpcb *tp)
+{
+ struct syn_cache *sc, *nsc;
+ int s;
+
+ s = splsoftnet();
+
+ for (sc = LIST_FIRST(&tp->t_sc); sc != NULL; sc = nsc) {
+ nsc = LIST_NEXT(sc, sc_tpq);
+
+#ifdef DIAGNOSTIC
+ if (sc->sc_tp != tp)
+ panic("invalid sc_tp in syn_cache_cleanup");
+#endif
+ syn_cache_rm(sc);
+ syn_cache_put(sc); /* calls pool_put but see spl above */
+ }
+ /* just for safety */
+ LIST_INIT(&tp->t_sc);
+
+ splx(s);
+}
+
+/*
+ * Find an entry in the syn cache.
+ */
+struct syn_cache *
+syn_cache_lookup(const struct sockaddr *src, const struct sockaddr *dst,
+ struct syn_cache_head **headp)
+{
+ struct syn_cache *sc;
+ struct syn_cache_head *scp;
+ u_int32_t hash;
+ int s;
+
+ SYN_HASHALL(hash, src, dst);
+
+ scp = &tcp_syn_cache[hash % tcp_syn_cache_size];
+ *headp = scp;
+ s = splsoftnet();
+ for (sc = TAILQ_FIRST(&scp->sch_bucket); sc != NULL;
+ sc = TAILQ_NEXT(sc, sc_bucketq)) {
+ if (sc->sc_hash != hash)
+ continue;
+ if (!memcmp(&sc->sc_src, src, src->sa_len) &&
+ !memcmp(&sc->sc_dst, dst, dst->sa_len)) {
+ splx(s);
+ return (sc);
+ }
+ }
+ splx(s);
+ return (NULL);
+}
+
+/*
+ * This function gets called when we receive an ACK for a socket in the
+ * LISTEN state. We look up the connection in the syn cache, and if it's
+ * there, we pull it out of the cache and turn it into a full-blown
+ * connection in the SYN-RECEIVED state.
+ *
+ * The return values may not be immediately obvious, and their effects
+ * can be subtle, so here they are:
+ *
+ * NULL SYN was not found in cache; caller should drop the
+ * packet and send an RST.
+ *
+ * -1 We were unable to create the new connection, and are
+ * aborting it. An ACK,RST is being sent to the peer
+ * (unless we got screwey sequence numbers; see below),
+ * because the 3-way handshake has been completed. Caller
+ * should not free the mbuf, since we may be using it. If
+ * we are not, we will free it.
+ *
+ * Otherwise, the return value is a pointer to the new socket
+ * associated with the connection.
+ */
+struct socket *
+syn_cache_get(struct sockaddr *src, struct sockaddr *dst,
+ struct tcphdr *th, struct socket *so, struct mbuf *m)
+{
+ struct syn_cache *sc;
+ struct syn_cache_head *scp;
+ struct inpcb *inp = NULL;
+#ifdef INET6
+ struct in6pcb *in6p = NULL;
+#endif
+ struct tcpcb *tp;
+ int s;
+ struct socket *oso;
+
+ s = splsoftnet();
+ if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
+ splx(s);
+ return NULL;
+ }
+
+ /*
+ * Verify the sequence and ack numbers. Try getting the correct
+ * response again.
+ */
+ if ((th->th_ack != sc->sc_iss + 1) ||
+ SEQ_LEQ(th->th_seq, sc->sc_irs) ||
+ SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) {
+ m_freem(m);
+ (void)syn_cache_respond(sc);
+ splx(s);
+ return ((struct socket *)(-1));
+ }
+
+ /* Remove this cache entry */
+ syn_cache_rm(sc);
+ splx(s);
+
+ /*
+ * Ok, create the full blown connection, and set things up
+ * as they would have been set up if we had created the
+ * connection when the SYN arrived. If we can't create
+ * the connection, abort it.
+ */
+ /*
+ * inp still has the OLD in_pcb stuff, set the
+ * v6-related flags on the new guy, too. This is
+ * done particularly for the case where an AF_INET6
+ * socket is bound only to a port, and a v4 connection
+ * comes in on that port.
+ * we also copy the flowinfo from the original pcb
+ * to the new one.
+ */
+ oso = so;
+ so = sonewconn(so, true);
+ if (so == NULL)
+ goto resetandabort;
+
+ switch (so->so_proto->pr_domain->dom_family) {
+ case AF_INET:
+ inp = sotoinpcb(so);
+ break;
+#ifdef INET6
+ case AF_INET6:
+ in6p = sotoin6pcb(so);
+ break;
+#endif
+ }
+
+ switch (src->sa_family) {
+ case AF_INET:
+ if (inp) {
+ inp->inp_laddr = ((struct sockaddr_in *)dst)->sin_addr;
+ inp->inp_lport = ((struct sockaddr_in *)dst)->sin_port;
+ inp->inp_options = ip_srcroute(m);
+ in_pcbstate(inp, INP_BOUND);
+ if (inp->inp_options == NULL) {
+ inp->inp_options = sc->sc_ipopts;
+ sc->sc_ipopts = NULL;
+ }
+ }
+#ifdef INET6
+ else if (in6p) {
+ /* IPv4 packet to AF_INET6 socket */
+ memset(&in6p->in6p_laddr, 0, sizeof(in6p->in6p_laddr));
+ in6p->in6p_laddr.s6_addr16[5] = htons(0xffff);
+ bcopy(&((struct sockaddr_in *)dst)->sin_addr,
+ &in6p->in6p_laddr.s6_addr32[3],
+ sizeof(((struct sockaddr_in *)dst)->sin_addr));
+ in6p->in6p_lport = ((struct sockaddr_in *)dst)->sin_port;
+ in6totcpcb(in6p)->t_family = AF_INET;
+ if (sotoin6pcb(oso)->in6p_flags & IN6P_IPV6_V6ONLY)
+ in6p->in6p_flags |= IN6P_IPV6_V6ONLY;
+ else
+ in6p->in6p_flags &= ~IN6P_IPV6_V6ONLY;
+ in6_pcbstate(in6p, IN6P_BOUND);
+ }
+#endif
+ break;
+#ifdef INET6
+ case AF_INET6:
+ if (in6p) {
+ in6p->in6p_laddr = ((struct sockaddr_in6 *)dst)->sin6_addr;
+ in6p->in6p_lport = ((struct sockaddr_in6 *)dst)->sin6_port;
+ in6_pcbstate(in6p, IN6P_BOUND);
+ }
+ break;
+#endif
+ }
+
+#ifdef INET6
+ if (in6p && in6totcpcb(in6p)->t_family == AF_INET6 && sotoinpcb(oso)) {
+ struct in6pcb *oin6p = sotoin6pcb(oso);
+ /* inherit socket options from the listening socket */
+ in6p->in6p_flags |= (oin6p->in6p_flags & IN6P_CONTROLOPTS);
+ if (in6p->in6p_flags & IN6P_CONTROLOPTS) {
+ m_freem(in6p->in6p_options);
+ in6p->in6p_options = NULL;
+ }
+ ip6_savecontrol(in6p, &in6p->in6p_options,
+ mtod(m, struct ip6_hdr *), m);
+ }
+#endif
+
+ /*
+ * Give the new socket our cached route reference.
+ */
+ if (inp) {
+ rtcache_copy(&inp->inp_route, &sc->sc_route);
+ rtcache_free(&sc->sc_route);
+ }
+#ifdef INET6
+ else {
+ rtcache_copy(&in6p->in6p_route, &sc->sc_route);
+ rtcache_free(&sc->sc_route);
+ }
+#endif
+
+ if (inp) {
+ struct sockaddr_in sin;
+ memcpy(&sin, src, src->sa_len);
+ if (in_pcbconnect(inp, &sin, &lwp0)) {
+ goto resetandabort;
+ }
+ }
+#ifdef INET6
+ else if (in6p) {
+ struct sockaddr_in6 sin6;
+ memcpy(&sin6, src, src->sa_len);
+ if (src->sa_family == AF_INET) {
+ /* IPv4 packet to AF_INET6 socket */
+ in6_sin_2_v4mapsin6((struct sockaddr_in *)src, &sin6);
+ }
+ if (in6_pcbconnect(in6p, &sin6, NULL)) {
+ goto resetandabort;
+ }
+ }
+#endif
+ else {
+ goto resetandabort;
+ }
+
+ if (inp)
+ tp = intotcpcb(inp);
+#ifdef INET6
+ else if (in6p)
+ tp = in6totcpcb(in6p);
+#endif
+ else
+ tp = NULL;
+
+ tp->t_flags = sototcpcb(oso)->t_flags & TF_NODELAY;
+ if (sc->sc_request_r_scale != 15) {
+ tp->requested_s_scale = sc->sc_requested_s_scale;
+ tp->request_r_scale = sc->sc_request_r_scale;
+ tp->snd_scale = sc->sc_requested_s_scale;
+ tp->rcv_scale = sc->sc_request_r_scale;
+ tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
+ }
+ if (sc->sc_flags & SCF_TIMESTAMP)
+ tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
+ tp->ts_timebase = sc->sc_timebase;
+
+ tp->t_template = tcp_template(tp);
+ if (tp->t_template == 0) {
+ tp = tcp_drop(tp, ENOBUFS); /* destroys socket */
+ so = NULL;
+ m_freem(m);
+ goto abort;
+ }
+
+ tp->iss = sc->sc_iss;
+ tp->irs = sc->sc_irs;
+ tcp_sendseqinit(tp);
+ tcp_rcvseqinit(tp);
+ tp->t_state = TCPS_SYN_RECEIVED;
+ TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
+ TCP_STATINC(TCP_STAT_ACCEPTS);
+
+ if ((sc->sc_flags & SCF_SACK_PERMIT) && tcp_do_sack)
+ tp->t_flags |= TF_WILL_SACK;
+
+ if ((sc->sc_flags & SCF_ECN_PERMIT) && tcp_do_ecn)
+ tp->t_flags |= TF_ECN_PERMIT;
+
+#ifdef TCP_SIGNATURE
+ if (sc->sc_flags & SCF_SIGNATURE)
+ tp->t_flags |= TF_SIGNATURE;
+#endif
+
+ /* Initialize tp->t_ourmss before we deal with the peer's! */
+ tp->t_ourmss = sc->sc_ourmaxseg;
+ tcp_mss_from_peer(tp, sc->sc_peermaxseg);
+
+ /*
+ * Initialize the initial congestion window. If we
+ * had to retransmit the SYN,ACK, we must initialize cwnd
+ * to 1 segment (i.e. the Loss Window).
+ */
+ if (sc->sc_rxtshift)
+ tp->snd_cwnd = tp->t_peermss;
+ else {
+ int ss = tcp_init_win;
+ if (inp != NULL && in_localaddr(inp->inp_faddr))
+ ss = tcp_init_win_local;
+#ifdef INET6
+ if (in6p != NULL && in6_localaddr(&in6p->in6p_faddr))
+ ss = tcp_init_win_local;
+#endif
+ tp->snd_cwnd = TCP_INITIAL_WINDOW(ss, tp->t_peermss);
+ }
+
+ tcp_rmx_rtt(tp);
+ tp->snd_wl1 = sc->sc_irs;
+ tp->rcv_up = sc->sc_irs + 1;
+
+ /*
+ * This is what would have happened in tcp_output() when
+ * the SYN,ACK was sent.
+ */
+ tp->snd_up = tp->snd_una;
+ tp->snd_max = tp->snd_nxt = tp->iss+1;
+ TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
+ if (sc->sc_win > 0 && SEQ_GT(tp->rcv_nxt + sc->sc_win, tp->rcv_adv))
+ tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
+ tp->last_ack_sent = tp->rcv_nxt;
+ tp->t_partialacks = -1;
+ tp->t_dupacks = 0;
+
+ TCP_STATINC(TCP_STAT_SC_COMPLETED);
+ s = splsoftnet();
+ syn_cache_put(sc);
+ splx(s);
+ return so;
+
+resetandabort:
+ (void)tcp_respond(NULL, m, m, th, (tcp_seq)0, th->th_ack, TH_RST);
+abort:
+ if (so != NULL) {
+ (void) soqremque(so, 1);
+ (void) soabort(so);
+ mutex_enter(softnet_lock);
+ }
+ s = splsoftnet();
+ syn_cache_put(sc);
+ splx(s);
+ TCP_STATINC(TCP_STAT_SC_ABORTED);
+ return ((struct socket *)(-1));
+}
+
+/*
+ * This function is called when we get a RST for a
+ * non-existent connection, so that we can see if the
+ * connection is in the syn cache. If it is, zap it.
+ */
+
+void
+syn_cache_reset(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th)
+{
+ struct syn_cache *sc;
+ struct syn_cache_head *scp;
+ int s = splsoftnet();
+
+ if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
+ splx(s);
+ return;
+ }
+ if (SEQ_LT(th->th_seq, sc->sc_irs) ||
+ SEQ_GT(th->th_seq, sc->sc_irs+1)) {
+ splx(s);
+ return;
+ }
+ syn_cache_rm(sc);
+ TCP_STATINC(TCP_STAT_SC_RESET);
+ syn_cache_put(sc); /* calls pool_put but see spl above */
+ splx(s);
+}
+
+void
+syn_cache_unreach(const struct sockaddr *src, const struct sockaddr *dst,
+ struct tcphdr *th)
+{
+ struct syn_cache *sc;
+ struct syn_cache_head *scp;
+ int s;
+
+ s = splsoftnet();
+ if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
+ splx(s);
+ return;
+ }
+ /* If the sequence number != sc_iss, then it's a bogus ICMP msg */
+ if (ntohl(th->th_seq) != sc->sc_iss) {
+ splx(s);
+ return;
+ }
+
+ /*
+ * If we've retransmitted 3 times and this is our second error,
+ * we remove the entry. Otherwise, we allow it to continue on.
+ * This prevents us from incorrectly nuking an entry during a
+ * spurious network outage.
+ *
+ * See tcp_notify().
+ */
+ if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtshift < 3) {
+ sc->sc_flags |= SCF_UNREACH;
+ splx(s);
+ return;
+ }
+
+ syn_cache_rm(sc);
+ TCP_STATINC(TCP_STAT_SC_UNREACH);
+ syn_cache_put(sc); /* calls pool_put but see spl above */
+ splx(s);
+}
+
+/*
+ * Given a LISTEN socket and an inbound SYN request, add this to the syn
+ * cache, and send back a segment:
+ * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
+ * to the source.
+ *
+ * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
+ * Doing so would require that we hold onto the data and deliver it
+ * to the application. However, if we are the target of a SYN-flood
+ * DoS attack, an attacker could send data which would eventually
+ * consume all available buffer space if it were ACKed. By not ACKing
+ * the data, we avoid this DoS scenario.
+ */
+int
+syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
+ unsigned int toff, struct socket *so, struct mbuf *m, u_char *optp,
+ int optlen, struct tcp_opt_info *oi)
+{
+ struct tcpcb tb, *tp;
+ long win;
+ struct syn_cache *sc;
+ struct syn_cache_head *scp;
+ struct mbuf *ipopts;
+ int s;
+
+ tp = sototcpcb(so);
+
+ /*
+ * Initialize some local state.
+ */
+ win = sbspace(&so->so_rcv);
+ if (win > TCP_MAXWIN)
+ win = TCP_MAXWIN;
+
+#ifdef TCP_SIGNATURE
+ if (optp || (tp->t_flags & TF_SIGNATURE))
+#else
+ if (optp)
+#endif
+ {
+ tb.t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
+#ifdef TCP_SIGNATURE
+ tb.t_flags |= (tp->t_flags & TF_SIGNATURE);
+#endif
+ tb.t_state = TCPS_LISTEN;
+ if (tcp_dooptions(&tb, optp, optlen, th, m, toff, oi) < 0)
+ return 0;
+ } else
+ tb.t_flags = 0;
+
+ switch (src->sa_family) {
+ case AF_INET:
+ /* Remember the IP options, if any. */
+ ipopts = ip_srcroute(m);
+ break;
+ default:
+ ipopts = NULL;
+ }
+
+ /*
+ * See if we already have an entry for this connection.
+ * If we do, resend the SYN,ACK. We do not count this
+ * as a retransmission (XXX though maybe we should).
+ */
+ if ((sc = syn_cache_lookup(src, dst, &scp)) != NULL) {
+ TCP_STATINC(TCP_STAT_SC_DUPESYN);
+ if (ipopts) {
+ /*
+ * If we were remembering a previous source route,
+ * forget it and use the new one we've been given.
+ */
+ if (sc->sc_ipopts)
+ (void)m_free(sc->sc_ipopts);
+ sc->sc_ipopts = ipopts;
+ }
+ sc->sc_timestamp = tb.ts_recent;
+ m_freem(m);
+ if (syn_cache_respond(sc) == 0) {
+ uint64_t *tcps = TCP_STAT_GETREF();
+ tcps[TCP_STAT_SNDACKS]++;
+ tcps[TCP_STAT_SNDTOTAL]++;
+ TCP_STAT_PUTREF();
+ }
+ return 1;
+ }
+
+ s = splsoftnet();
+ sc = pool_get(&syn_cache_pool, PR_NOWAIT);
+ splx(s);
+ if (sc == NULL) {
+ if (ipopts)
+ (void)m_free(ipopts);
+ return 0;
+ }
+
+ /*
+ * Fill in the cache, and put the necessary IP and TCP
+ * options into the reply.
+ */
+ memset(sc, 0, sizeof(struct syn_cache));
+ callout_init(&sc->sc_timer, CALLOUT_MPSAFE);
+ memcpy(&sc->sc_src, src, src->sa_len);
+ memcpy(&sc->sc_dst, dst, dst->sa_len);
+ sc->sc_flags = 0;
+ sc->sc_ipopts = ipopts;
+ sc->sc_irs = th->th_seq;
+ switch (src->sa_family) {
+ case AF_INET:
+ {
+ struct sockaddr_in *srcin = (void *)src;
+ struct sockaddr_in *dstin = (void *)dst;
+
+ sc->sc_iss = tcp_new_iss1(&dstin->sin_addr,
+ &srcin->sin_addr, dstin->sin_port,
+ srcin->sin_port, sizeof(dstin->sin_addr));
+ break;
+ }
+#ifdef INET6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *srcin6 = (void *)src;
+ struct sockaddr_in6 *dstin6 = (void *)dst;
+
+ sc->sc_iss = tcp_new_iss1(&dstin6->sin6_addr,
+ &srcin6->sin6_addr, dstin6->sin6_port,
+ srcin6->sin6_port, sizeof(dstin6->sin6_addr));
+ break;
+ }
+#endif
+ }
+ sc->sc_peermaxseg = oi->maxseg;
+ sc->sc_ourmaxseg = tcp_mss_to_advertise(m->m_flags & M_PKTHDR ?
+ m_get_rcvif_NOMPSAFE(m) : NULL, sc->sc_src.sa.sa_family);
+ sc->sc_win = win;
+ sc->sc_timebase = tcp_now - 1; /* see tcp_newtcpcb() */
+ sc->sc_timestamp = tb.ts_recent;
+ if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) ==
+ (TF_REQ_TSTMP|TF_RCVD_TSTMP))
+ sc->sc_flags |= SCF_TIMESTAMP;
+ if ((tb.t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+ sc->sc_requested_s_scale = tb.requested_s_scale;
+ sc->sc_request_r_scale = 0;
+ /*
+ * Pick the smallest possible scaling factor that
+ * will still allow us to scale up to sb_max.
+ *
+ * We do this because there are broken firewalls that
+ * will corrupt the window scale option, leading to
+ * the other endpoint believing that our advertised
+ * window is unscaled. At scale factors larger than
+ * 5 the unscaled window will drop below 1500 bytes,
+ * leading to serious problems when traversing these
+ * broken firewalls.
+ *
+ * With the default sbmax of 256K, a scale factor
+ * of 3 will be chosen by this algorithm. Those who
+ * choose a larger sbmax should watch out
+ * for the compatibility problems mentioned above.
+ *
+ * RFC1323: The Window field in a SYN (i.e., a <SYN>
+ * or <SYN,ACK>) segment itself is never scaled.
+ */
+ while (sc->sc_request_r_scale < TCP_MAX_WINSHIFT &&
+ (TCP_MAXWIN << sc->sc_request_r_scale) < sb_max)
+ sc->sc_request_r_scale++;
+ } else {
+ sc->sc_requested_s_scale = 15;
+ sc->sc_request_r_scale = 15;
+ }
+ if ((tb.t_flags & TF_SACK_PERMIT) && tcp_do_sack)
+ sc->sc_flags |= SCF_SACK_PERMIT;
+
+ /*
+ * ECN setup packet received.
+ */
+ if ((th->th_flags & (TH_ECE|TH_CWR)) && tcp_do_ecn)
+ sc->sc_flags |= SCF_ECN_PERMIT;
+
+#ifdef TCP_SIGNATURE
+ if (tb.t_flags & TF_SIGNATURE)
+ sc->sc_flags |= SCF_SIGNATURE;
+#endif
+ sc->sc_tp = tp;
+ m_freem(m);
+ if (syn_cache_respond(sc) == 0) {
+ uint64_t *tcps = TCP_STAT_GETREF();
+ tcps[TCP_STAT_SNDACKS]++;
+ tcps[TCP_STAT_SNDTOTAL]++;
+ TCP_STAT_PUTREF();
+ syn_cache_insert(sc, tp);
+ } else {
+ s = splsoftnet();
+ /*
+ * syn_cache_put() will try to schedule the timer, so
+ * we need to initialize it
+ */
+ syn_cache_timer_arm(sc);
+ syn_cache_put(sc);
+ splx(s);
+ TCP_STATINC(TCP_STAT_SC_DROPPED);
+ }
+ return 1;
+}
+
+/*
+ * syn_cache_respond: (re)send SYN+ACK.
+ *
+ * Returns 0 on success.
+ */
+
+int
+syn_cache_respond(struct syn_cache *sc)
+{
+#ifdef INET6
+ struct rtentry *rt = NULL;
+#endif
+ struct route *ro;
+ u_int8_t *optp;
+ int optlen, error;
+ u_int16_t tlen;
+ struct ip *ip = NULL;
+#ifdef INET6
+ struct ip6_hdr *ip6 = NULL;
+#endif
+ struct tcpcb *tp;
+ struct tcphdr *th;
+ struct mbuf *m;
+ u_int hlen;
+#ifdef TCP_SIGNATURE
+ struct secasvar *sav = NULL;
+ u_int8_t *sigp = NULL;
+#endif
+
+ ro = &sc->sc_route;
+ switch (sc->sc_src.sa.sa_family) {
+ case AF_INET:
+ hlen = sizeof(struct ip);
+ break;
+#ifdef INET6
+ case AF_INET6:
+ hlen = sizeof(struct ip6_hdr);
+ break;
+#endif
+ default:
+ return EAFNOSUPPORT;
+ }
+
+ /* Worst case scenario, since we don't know the option size yet. */
+ tlen = hlen + sizeof(struct tcphdr) + MAX_TCPOPTLEN;
+ KASSERT(max_linkhdr + tlen <= MCLBYTES);
+
+ /*
+ * Create the IP+TCP header from scratch.
+ */
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
+ if (m && (max_linkhdr + tlen) > MHLEN) {
+ MCLGET(m, M_DONTWAIT);
+ if ((m->m_flags & M_EXT) == 0) {
+ m_freem(m);
+ m = NULL;
+ }
+ }
+ if (m == NULL)
+ return ENOBUFS;
+ MCLAIM(m, &tcp_tx_mowner);
+
+ tp = sc->sc_tp;
+
+ /* Fixup the mbuf. */
+ m->m_data += max_linkhdr;
+ m_reset_rcvif(m);
+ memset(mtod(m, void *), 0, tlen);
+
+ switch (sc->sc_src.sa.sa_family) {
+ case AF_INET:
+ ip = mtod(m, struct ip *);
+ ip->ip_v = 4;
+ ip->ip_dst = sc->sc_src.sin.sin_addr;
+ ip->ip_src = sc->sc_dst.sin.sin_addr;
+ ip->ip_p = IPPROTO_TCP;
+ th = (struct tcphdr *)(ip + 1);
+ th->th_dport = sc->sc_src.sin.sin_port;
+ th->th_sport = sc->sc_dst.sin.sin_port;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_vfc = IPV6_VERSION;
+ ip6->ip6_dst = sc->sc_src.sin6.sin6_addr;
+ ip6->ip6_src = sc->sc_dst.sin6.sin6_addr;
+ ip6->ip6_nxt = IPPROTO_TCP;
+ /* ip6_plen will be updated in ip6_output() */
+ th = (struct tcphdr *)(ip6 + 1);
+ th->th_dport = sc->sc_src.sin6.sin6_port;
+ th->th_sport = sc->sc_dst.sin6.sin6_port;
+ break;
+#endif
+ default:
+ panic("%s: impossible (1)", __func__);
+ }
+
+ th->th_seq = htonl(sc->sc_iss);
+ th->th_ack = htonl(sc->sc_irs + 1);
+ th->th_flags = TH_SYN|TH_ACK;
+ th->th_win = htons(sc->sc_win);
+ /* th_x2, th_sum, th_urp already 0 from memset */
+
+ /* Tack on the TCP options. */
+ optp = (u_int8_t *)(th + 1);
+ optlen = 0;
+ *optp++ = TCPOPT_MAXSEG;
+ *optp++ = TCPOLEN_MAXSEG;
+ *optp++ = (sc->sc_ourmaxseg >> 8) & 0xff;
+ *optp++ = sc->sc_ourmaxseg & 0xff;
+ optlen += TCPOLEN_MAXSEG;
+
+ if (sc->sc_request_r_scale != 15) {
+ *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
+ TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
+ sc->sc_request_r_scale);
+ optp += TCPOLEN_WINDOW + TCPOLEN_NOP;
+ optlen += TCPOLEN_WINDOW + TCPOLEN_NOP;
+ }
+
+ if (sc->sc_flags & SCF_SACK_PERMIT) {
+ /* Let the peer know that we will SACK. */
+ *optp++ = TCPOPT_SACK_PERMITTED;
+ *optp++ = TCPOLEN_SACK_PERMITTED;
+ optlen += TCPOLEN_SACK_PERMITTED;
+ }
+
+ if (sc->sc_flags & SCF_TIMESTAMP) {
+ while (optlen % 4 != 2) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ *optp++ = TCPOPT_TIMESTAMP;
+ *optp++ = TCPOLEN_TIMESTAMP;
+ u_int32_t *lp = (u_int32_t *)(optp);
+ /* Form timestamp option as shown in appendix A of RFC 1323. */
+ *lp++ = htonl(SYN_CACHE_TIMESTAMP(sc));
+ *lp = htonl(sc->sc_timestamp);
+ optp += TCPOLEN_TIMESTAMP - 2;
+ optlen += TCPOLEN_TIMESTAMP;
+ }
+
+#ifdef TCP_SIGNATURE
+ if (sc->sc_flags & SCF_SIGNATURE) {
+ sav = tcp_signature_getsav(m);
+ if (sav == NULL) {
+ m_freem(m);
+ return EPERM;
+ }
+
+ *optp++ = TCPOPT_SIGNATURE;
+ *optp++ = TCPOLEN_SIGNATURE;
+ sigp = optp;
+ memset(optp, 0, TCP_SIGLEN);
+ optp += TCP_SIGLEN;
+ optlen += TCPOLEN_SIGNATURE;
+ }
+#endif
+
+ /*
+ * Terminate and pad TCP options to a 4 byte boundary.
+ *
+ * According to RFC793: "The content of the header beyond the
+ * End-of-Option option must be header padding (i.e., zero)."
+ * And later: "The padding is composed of zeros."
+ */
+ if (optlen % 4) {
+ optlen += TCPOLEN_EOL;
+ *optp++ = TCPOPT_EOL;
+ }
+ while (optlen % 4) {
+ optlen += TCPOLEN_PAD;
+ *optp++ = TCPOPT_PAD;
+ }
+
+ /* Compute the actual values now that we've added the options. */
+ tlen = hlen + sizeof(struct tcphdr) + optlen;
+ m->m_len = m->m_pkthdr.len = tlen;
+ th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+
+#ifdef TCP_SIGNATURE
+ if (sav) {
+ (void)tcp_signature(m, th, hlen, sav, sigp);
+ key_sa_recordxfer(sav, m);
+ KEY_SA_UNREF(&sav);
+ }
+#endif
+
+ /*
+ * Send ECN SYN-ACK setup packet.
+ * Routes can be asymmetric, so, even if we receive a packet
+ * with ECE and CWR set, we must not assume no one will block
+ * the ECE packet we are about to send.
+ */
+ if ((sc->sc_flags & SCF_ECN_PERMIT) && tp &&
+ SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
+ th->th_flags |= TH_ECE;
+ TCP_STATINC(TCP_STAT_ECN_SHS);
+
+ /*
+ * draft-ietf-tcpm-ecnsyn-00.txt
+ *
+ * "[...] a TCP node MAY respond to an ECN-setup
+ * SYN packet by setting ECT in the responding
+ * ECN-setup SYN/ACK packet, indicating to routers
+ * that the SYN/ACK packet is ECN-Capable.
+ * This allows a congested router along the path
+ * to mark the packet instead of dropping the
+ * packet as an indication of congestion."
+ *
+ * "[...] There can be a great benefit in setting
+ * an ECN-capable codepoint in SYN/ACK packets [...]
+ * Congestion is most likely to occur in
+ * the server-to-client direction. As a result,
+ * setting an ECN-capable codepoint in SYN/ACK
+ * packets can reduce the occurrence of three-second
+ * retransmit timeouts resulting from the drop
+ * of SYN/ACK packets."
+ *
+ * Page 4 and 6, January 2006.
+ */
+
+ switch (sc->sc_src.sa.sa_family) {
+ case AF_INET:
+ ip->ip_tos |= IPTOS_ECN_ECT0;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+ break;
+#endif
+ }
+ TCP_STATINC(TCP_STAT_ECN_ECT);
+ }
+
+
+ /*
+ * Compute the packet's checksum.
+ *
+ * Fill in some straggling IP bits. Note the stack expects
+ * ip_len to be in host order, for convenience.
+ */
+ switch (sc->sc_src.sa.sa_family) {
+ case AF_INET:
+ ip->ip_len = htons(tlen - hlen);
+ th->th_sum = 0;
+ th->th_sum = in4_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
+ ip->ip_len = htons(tlen);
+ ip->ip_ttl = ip_defttl;
+ /* XXX tos? */
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6->ip6_plen = htons(tlen - hlen);
+ th->th_sum = 0;
+ th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
+ ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
+ ip6->ip6_vfc |= IPV6_VERSION;
+ ip6->ip6_plen = htons(tlen - hlen);
+ /* ip6_hlim will be initialized afterwards */
+ /* XXX flowlabel? */
+ break;
+#endif
+ }
+
+ /* XXX use IPsec policy on listening socket, on SYN ACK */
+ tp = sc->sc_tp;
+
+ switch (sc->sc_src.sa.sa_family) {
+ case AF_INET:
+ error = ip_output(m, sc->sc_ipopts, ro,
+ (ip_mtudisc ? IP_MTUDISC : 0),
+ NULL, tp ? tp->t_inpcb : NULL);
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6->ip6_hlim = in6_selecthlim(NULL,
+ (rt = rtcache_validate(ro)) != NULL ? rt->rt_ifp : NULL);
+ rtcache_unref(rt, ro);
+
+ error = ip6_output(m, NULL /*XXX*/, ro, 0, NULL,
+ tp ? tp->t_in6pcb : NULL, NULL);
+ break;
+#endif
+ default:
+ panic("%s: impossible (2)", __func__);
+ }
+
+ return error;
+}
Index: src/sys/netinet/tcp_syncache.h
diff -u /dev/null src/sys/netinet/tcp_syncache.h:1.1
--- /dev/null Tue Sep 20 07:19:15 2022
+++ src/sys/netinet/tcp_syncache.h Tue Sep 20 07:19:14 2022
@@ -0,0 +1,222 @@
+/* $NetBSD: tcp_syncache.h,v 1.1 2022/09/20 07:19:14 ozaki-r Exp $ */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
+ *
+ * NRL grants permission for redistribution and use in source and binary
+ * forms, with or without modification, of the software and documentation
+ * created at NRL provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgements:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * This product includes software developed at the Information
+ * Technology Division, US Naval Research Laboratory.
+ * 4. Neither the name of the NRL nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
+ * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation
+ * are those of the authors and should not be interpreted as representing
+ * official policies, either expressed or implied, of the US Naval
+ * Research Laboratory (NRL).
+ */
+
+/*-
+ * Copyright (c) 1997, 1998, 1999, 2001, 2005 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
+ * NASA Ames Research Center.
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Charles M. Hannum.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1982, 1986, 1993, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
+ */
+
+#ifndef _NETINET_TCP_SYNCACHE_H_
+#define _NETINET_TCP_SYNCACHE_H_
+
+#if defined(_KERNEL_OPT)
+#include "opt_inet.h"
+#include "opt_mbuftrace.h"
+#endif
+
+#ifdef _KERNEL
+#include <sys/callout.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+
+#include <net/route.h>
+
+/*
+ * Data for the TCP compressed state engine.
+ */
+union syn_cache_sa {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+#if 1 /*def INET6*/
+ struct sockaddr_in6 sin6;
+#endif
+};
+
+struct syn_cache {
+ TAILQ_ENTRY(syn_cache) sc_bucketq; /* link on bucket list */
+ callout_t sc_timer; /* rexmt timer */
+ struct route sc_route;
+ long sc_win; /* advertised window */
+ int sc_bucketidx; /* our bucket index */
+ u_int32_t sc_hash;
+ u_int32_t sc_timestamp; /* timestamp from SYN */
+ u_int32_t sc_timebase; /* our local timebase */
+ union syn_cache_sa sc_src;
+ union syn_cache_sa sc_dst;
+ tcp_seq sc_irs;
+ tcp_seq sc_iss;
+ u_int sc_rxtcur; /* current rxt timeout */
+ u_int sc_rxttot; /* total time spend on queues */
+ u_short sc_rxtshift; /* for computing backoff */
+ u_short sc_flags;
+
+#define SCF_UNREACH 0x0001 /* we've had an unreach error */
+#define SCF_TIMESTAMP 0x0002 /* peer will do timestamps */
+#define SCF_DEAD 0x0004 /* this entry to be released */
+#define SCF_SACK_PERMIT 0x0008 /* peer will do SACK */
+#define SCF_ECN_PERMIT 0x0010 /* peer will do ECN */
+#define SCF_SIGNATURE 0x40 /* send MD5 digests */
+
+ struct mbuf *sc_ipopts; /* IP options */
+ u_int16_t sc_peermaxseg;
+ u_int16_t sc_ourmaxseg;
+ u_int8_t sc_request_r_scale : 4,
+ sc_requested_s_scale : 4;
+
+ struct tcpcb *sc_tp; /* tcb for listening socket */
+ LIST_ENTRY(syn_cache) sc_tpq; /* list of entries by same tp */
+};
+
+struct syn_cache_head {
+ TAILQ_HEAD(, syn_cache) sch_bucket; /* bucket entries */
+ u_short sch_length; /* # entries in bucket */
+};
+
+extern int tcp_syn_bucket_limit;/* max entries per hash bucket */
+extern int tcp_syn_cache_limit; /* max entries for compressed state engine */
+extern u_long syn_cache_count;
+
+int syn_cache_add(struct sockaddr *, struct sockaddr *,
+ struct tcphdr *, unsigned int, struct socket *,
+ struct mbuf *, u_char *, int, struct tcp_opt_info *);
+void syn_cache_unreach(const struct sockaddr *, const struct sockaddr *,
+ struct tcphdr *);
+struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *,
+ struct tcphdr *, struct socket *so, struct mbuf *);
+void syn_cache_init(void);
+void syn_cache_insert(struct syn_cache *, struct tcpcb *);
+struct syn_cache *syn_cache_lookup(const struct sockaddr *, const struct sockaddr *,
+ struct syn_cache_head **);
+void syn_cache_reset(struct sockaddr *, struct sockaddr *,
+ struct tcphdr *);
+int syn_cache_respond(struct syn_cache *);
+void syn_cache_cleanup(struct tcpcb *);
+#endif
+
+#endif /* !_NETINET_TCP_SYNCACHE_H_ */