--- Begin Message ---
Here's a quickie for the people who have been plagued with high bandwidth
syn flood attacks, a kernel patch for FreeBSD 3.1-STABLE which rate limits
SYN processing. Its messy but functional and I don't have time to make it
better (thats the fbsd developers job, not mine :P), cd /usr/src/sys,
patch < synlim, add "options SYN_RATELIM" (I highly recommend ICMP_BANDLIM
as well) to your kernel, recompile, and sysctl net.inet.tcp.synlim will be
available (default to 100). This is the maximium number of SYNs per second
that will be processed, the rest will be silently discarded. On my test
system (P2 450 running 3.1-stable being hit w/15,000 packets per sec),
this has successfully brought CPU usage from 100% to ~20% (against an open
port which is replying with unacknowledged ACKs).
Which brings us to the more sticky topic of kernel panics when under SYN
flood (which I believe to be the cause of some earlier posts from certain
people at Exodus Communications *cough*). Lord knows I found enough of
them when doing this testing, but the one that seems to be the biggie for
crashing when under syn flood is as follows (heh just turned off the
synlim and panic'd within 8 seconds while writing this):
panic: free: multiple frees
(kgdb) bt
#0 boot (howto=256) at ../../kern/kern_shutdown.c:285
#1 0xc0138c09 in panic (fmt=0xc02192b7 "free: multiple frees")
at ../../kern/kern_shutdown.c:446
#2 0xc0135aaf in free (addr=0xc0cdd600, type=0xc0239330)
at ../../kern/kern_malloc.c:333
#3 0xc01768f4 in ifafree (ifa=0xc0cdd600) at ../../net/route.c:262
#4 0xc0176876 in rtfree (rt=0xc34ce700) at ../../net/route.c:236
#5 0xc0176c84 in rtrequest (req=2, dst=0xc34cbac0, gateway=0xc34cbad0,
netmask=0x0, flags=393223, ret_nrt=0x0) at ../../net/route.c:536
#6 0xc017b34d in in_rtqkill (rn=0xc34ce700, rock=0xc0231610)
at ../../netinet/in_rmx.c:242
#7 0xc0176064 in rn_walktree (h=0xc0cd9e00, f=0xc017b2fc <in_rtqkill>,
w=0xc0231610) at ../../net/radix.c:956
#8 0xc017b3ec in in_rtqtimo (rock=0xc0cd9e00) at ../../netinet/in_rmx.c:283
#9 0xc013d19b in softclock () at ../../kern/kern_timeout.c:124
Which after a quick examination seems to be a perioditic routing table
cleanup. It seems that in_rtqtimo is scheduled to run every
net.inet.ip.rtexpire seconds (which is dynamicly adjusted and can never go
lower then net.inet.ip.rtminexpire). When the system is under heavy load
from processing lots of small packets (they don't even have to be SYNs,
anything which can get routed will do the trick, though the packet kiddies
would get very little gain from just sending an ip header since its going
to be padded to 64 bytes for the eth frame anyhow), this route cleanup
code will go wacking at routes it shouldn't and free some memory twice. In
the course of testing I've gotten my rtq_reallyold to -3 and seen lots of
"tvotohz: negative time difference -2 sec 0 usec". Perhaps someone with
free time or more specific knowledge of this area would like to FIX IT? =)
Perhaps when I get more free time I'll test some other *nix's. I would
really recommend putting all this rate limiting code at an ipfw level.
If you would like to contact me regarding this please use
hum...@quadrunner.com (at least if you want a quick reply), thanks.
--
Richard Steenbergen <hum...@lightning.net> hum...@efnet PGP ID: 0x741D0374
PGP Key Fingerprint: C6EF EFA0 83B2 071F 1AB6 B879 1F70 4303 741D 0374
http://users.quadrunner.com/humble
*** conf/options.old Sat May 15 23:08:03 1999
--- conf/options Sat May 15 23:40:21 1999
***************
*** 68,73 ****
--- 68,74 ----
SYSVSHM opt_sysvipc.h
UCONSOLE
ICMP_BANDLIM
+ SYN_RATELIM
# POSIX kernel options
P1003_1B opt_posix.h
*** netinet/tcp_var.h.old Sat May 15 23:25:39 1999
--- netinet/tcp_var.h Sat May 15 23:45:05 1999
***************
*** 40,45 ****
--- 40,49 ----
* Kernel variables for tcp.
*/
+ #ifdef KERNEL
+ #include "opt_syn_ratelim.h"
+ #endif
+
/*
* Tcp control block, one per tcp; fields:
* Organized for 16 byte cacheline efficiency.
***************
*** 305,311 ****
#define TCPCTL_RECVSPACE 9 /* receive buffer space */
#define TCPCTL_KEEPINIT 10 /* receive buffer space */
#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs
*/
! #define TCPCTL_MAXID 12
#define TCPCTL_NAMES { \
{ 0, 0 }, \
--- 309,316 ----
#define TCPCTL_RECVSPACE 9 /* receive buffer space */
#define TCPCTL_KEEPINIT 10 /* receive buffer space */
#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs
*/
! #define TCPCTL_SYNLIM 12 /* Rate limiting of SYNs */
! #define TCPCTL_MAXID 13
#define TCPCTL_NAMES { \
{ 0, 0 }, \
***************
*** 320,325 ****
--- 325,331 ----
{ "recvspace", CTLTYPE_INT }, \
{ "keepinit", CTLTYPE_INT }, \
{ "pcblist", CTLTYPE_STRUCT }, \
+ { "synlim", CTLTYPE_INT }, \
}
#ifdef KERNEL
*** netinet/tcp_input.c.old Sat May 15 23:08:10 1999
--- netinet/tcp_input.c Sun May 16 01:33:51 1999
***************
*** 72,77 ****
--- 72,85 ----
static struct tcpiphdr tcp_saveti;
#endif
+ #ifdef SYN_RATELIM
+ static int synlim = 100;
+ SYSCTL_INT(_net_inet_tcp, TCPCTL_SYNLIM, synlim, CTLFLAG_RW, &synlim, 0, "");
+ #else
+ static int synlim = -1;
+ SYSCTL_INT(_net_inet_tcp, TCPCTL_SYNLIM, synlim, CTLFLAG_RD, &synlim, 0, "");
+ #endif
+
static int tcprexmtthresh = 3;
tcp_seq tcp_iss;
tcp_cc tcp_ccgen;
***************
*** 98,104 ****
struct tcpiphdr *, struct mbuf *));
static int tcp_reass __P((struct tcpcb *, struct tcpiphdr *, struct mbuf
*));
static void tcp_xmit_timer __P((struct tcpcb *, int));
!
/*
* Insert segment ti into reassembly queue of tcp with
--- 106,112 ----
struct tcpiphdr *, struct mbuf *));
static int tcp_reass __P((struct tcpcb *, struct tcpiphdr *, struct mbuf
*));
static void tcp_xmit_timer __P((struct tcpcb *, int));
! static int syn_ratelim(void);
/*
* Insert segment ti into reassembly queue of tcp with
***************
*** 130,135 ****
--- 138,183 ----
} \
}
+ #ifdef SYN_RATELIM
+ int syn_ratelim(void)
+ {
+ static int lticks;
+ static int lpackets;
+ int dticks;
+
+ /*
+ * Return ok status if feature disabled or argument out of
+ * ranage.
+ */
+
+ if (synlim <= 0)
+ return(0);
+
+ dticks = ticks - lticks;
+
+ /*
+ * reset stats when cumulative dt exceeds one second.
+ */
+
+ if ((unsigned int)dticks > hz) {
+ if (lpackets > synlim)
+ printf("syn rate limit reached %d/%d pps\n", lpackets,
synlim);
+ lticks = ticks;
+ lpackets = 0;
+ }
+
+ /*
+ * bump packet count
+ */
+
+ if (++lpackets > synlim) {
+ return(-1);
+ }
+
+ return(0);
+ }
+ #endif
+
static int
tcp_reass(tp, ti, m)
register struct tcpcb *tp;
***************
*** 379,384 ****
--- 427,438 ----
ip_fw_fwd_addr = NULL;
} else
#endif /* IPFIREWALL_FORWARD */
+
+ #ifdef SYN_RATELIM
+ if ((tiflags & TH_SYN) && !(tiflags & TH_ACK))
+ if (syn_ratelim() < 0)
+ goto drop;
+ #endif
inp = in_pcblookup_hash(&tcbinfo, ti->ti_src, ti->ti_sport,
ti->ti_dst, ti->ti_dport, 1);
--- End Message ---