Author: cieciwa Date: Thu May 11 10:38:19 2006 GMT Module: SOURCES Tag: LINUX_2_6 ---- Log message: - kernel connrate module 20060504 snap.
---- Files affected: SOURCES: pom-ng-connrate-20060504.patch (NONE -> 1.1.2.1) (NEW) ---- Diffs: ================================================================ Index: SOURCES/pom-ng-connrate-20060504.patch diff -u /dev/null SOURCES/pom-ng-connrate-20060504.patch:1.1.2.1 --- /dev/null Thu May 11 12:38:19 2006 +++ SOURCES/pom-ng-connrate-20060504.patch Thu May 11 12:38:14 2006 @@ -0,0 +1,318 @@ + include/linux/netfilter_ipv4/ip_conntrack_rate.h | 32 +++++ + include/linux/netfilter_ipv4/ipt_connrate.h | 12 ++ + net/ipv4/netfilter/Kconfig | 29 +++++ + net/ipv4/netfilter/Makefile | 2 + net/ipv4/netfilter/ip_conntrack_rate.c | 133 +++++++++++++++++++++++ + net/ipv4/netfilter/ipt_connrate.c | 73 ++++++++++++ + 6 files changed, 281 insertions(+) + +diff -Nur --exclude '*.orig' linux.org/include/linux/netfilter_ipv4/ip_conntrack_rate.h linux/include/linux/netfilter_ipv4/ip_conntrack_rate.h +--- linux.org/include/linux/netfilter_ipv4/ip_conntrack_rate.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux/include/linux/netfilter_ipv4/ip_conntrack_rate.h 2006-05-11 12:35:34.000000000 +0200 +@@ -0,0 +1,32 @@ ++#ifndef _IP_CONNTRACK_RATE_H ++#define _IP_CONNTRACK_RATE_H ++ ++/* estimation interval, in jiffies */ ++#define IP_CONNTRACK_RATE_INTERVAL (3 * HZ) ++ ++/* scale on how many tokens per byte to generate */ ++#define IP_CONNTRACK_RATE_SCALE 100 ++ ++/* per conntrack: transfer rate in connection */ ++struct ip_conntrack_rate { ++ /* jiffies of previous received packet */ ++ unsigned long prev; ++ /* average rate of tokens per jiffy */ ++ u_int32_t avgrate; ++}; ++ ++#ifdef __KERNEL__ ++ ++/* Count a packet of len into given rate structure. */ ++extern void ++ip_conntrack_rate_count(struct ip_conntrack_rate *ctr, unsigned int len); ++ ++/* Return current rate as bytes per second. Note that the returned ++ rate is the rate at last received packet, not counting time has ++ that passed after it. */ ++extern u_int32_t ++ip_conntrack_rate_get(struct ip_conntrack_rate *ctr); ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* _IP_CONNTRACK_RATE_H */ +diff -Nur --exclude '*.orig' linux.org/include/linux/netfilter_ipv4/ipt_connrate.h linux/include/linux/netfilter_ipv4/ipt_connrate.h +--- linux.org/include/linux/netfilter_ipv4/ipt_connrate.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux/include/linux/netfilter_ipv4/ipt_connrate.h 2006-05-11 12:35:34.000000000 +0200 +@@ -0,0 +1,12 @@ ++#ifndef _IPT_CONNRATE_H ++#define _IPT_CONNRATE_H ++ ++struct ipt_connrate_info ++{ ++ /* Per connection transfer rate, in bytes per second. If ++ 'from' is smaller or equal to 'to', rate is matched to be ++ inside the inclusive range [from,to], otherwise rate is ++ matched to be outside the inclusive range [to,from]. */ ++ u_int32_t from, to; ++}; ++#endif +diff -Nur --exclude '*.orig' linux.org/net/ipv4/netfilter/Kconfig linux/net/ipv4/netfilter/Kconfig +--- linux.org/net/ipv4/netfilter/Kconfig 2006-05-02 23:38:44.000000000 +0200 ++++ linux/net/ipv4/netfilter/Kconfig 2006-05-11 12:35:34.000000000 +0200 +@@ -606,5 +606,34 @@ + Allows altering the ARP packet payload: source and destination + hardware and network addresses. + ++config IP_NF_CONNTRACK_RATE ++ bool "Connection rate estimation" ++ depends on IP_NF_CONNTRACK ++ help ++ ++ This enables per connection transfer rate estimation in connection ++ tracking code. This enlarges the amount of memory required by each ++ connection tracked a bit and adds the overhead of calculating the ++ transmission rate on every received packet. ++ ++ This is required to be able to match on the per connection transfer ++ rate, and can be a nice statistic to see in the connection tracking ++ table, but is useless otherwise. ++ ++ If unsure, say N. ++ ++config IP_NF_MATCH_CONNRATE ++ tristate "Connection rate match support" ++ depends on IP_NF_CONNTRACK_RATE && IP_NF_CONNTRACK && IP_NF_IPTABLES ++ help ++ This allows matching on the transfer rate on a per connection basis. ++ ++ Connection transfer rate estimation is performed separately by the ++ connection tracking code and is unaffected by the presence of matches ++ on it. Several connection rate matches may match a single packet and ++ every match will see the same rate. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ + endmenu + +diff -Nur --exclude '*.orig' linux.org/net/ipv4/netfilter/Makefile linux/net/ipv4/netfilter/Makefile +--- linux.org/net/ipv4/netfilter/Makefile 2006-05-02 23:38:44.000000000 +0200 ++++ linux/net/ipv4/netfilter/Makefile 2006-05-11 12:35:34.000000000 +0200 +@@ -0,0 +0,2 @@ ++obj-$(CONFIG_IP_NF_CONNTRACK_RATE) += ip_conntrack_rate.o ++obj-$(CONFIG_IP_NF_MATCH_CONNRATE) += ipt_connrate.o +diff -Nur --exclude '*.orig' linux.org/net/ipv4/netfilter/ip_conntrack_rate.c linux/net/ipv4/netfilter/ip_conntrack_rate.c +--- linux.org/net/ipv4/netfilter/ip_conntrack_rate.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux/net/ipv4/netfilter/ip_conntrack_rate.c 2006-05-11 12:35:34.000000000 +0200 +@@ -0,0 +1,133 @@ ++/* Connection transfer rate estimator for netfilter. ++ * ++ * Copyright (c) 2004 Nuutti Kotivuori <[EMAIL PROTECTED]> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include <linux/config.h> ++#include <linux/module.h> ++#include <linux/types.h> ++#include <linux/skbuff.h> ++#include <linux/jiffies.h> ++#include <linux/netfilter_ipv4/ip_conntrack_rate.h> ++ ++/* ++ I wanted to build a simpler and more robust rate estimator than the ++ one used in sched/estimator.c. After evaluating a few choices I ++ settled with the one given in an example in [RFC2859], which is the ++ rate estimator described in [TON98]. ++ ++ I will copy the example table from [RFC2859] here: ++ ++======================================================================== ++|Initially: | ++| | ++| AVG_INTERVAL = a constant; | ++| avg-rate = CTR; | ++| t-front = 0; | ++| | ++|Upon each packet's arrival, the rate estimator updates its variables: | ++| | ++| Bytes_in_win = avg-rate * AVG_INTERVAL; | ++| New_bytes = Bytes_in_win + pkt_size; | ++| avg-rate = New_bytes/( now - t-front + AVG_INTERVAL); | ++| t-front = now; | ++| | ++|Where: | ++| now = The time of the current packet arrival | ++| pkt_size = The packet size in bytes of the arriving packet | ++| avg-rate = Measured Arrival Rate of traffic stream | ++| AVG_INTERVAL = Time window over which history is kept | ++| | ++| | ++| Figure 2. Example Rate Estimator Algorithm | ++| | ++======================================================================== ++ ++ Additionally we have to be concerned about overflows, remainders ++ and resolution in the algorithm. These are documented in the code ++ below. ++ ++ References: ++ ++ [RFC2859] W. Fang, N. Seddigh and B. Nandy, "A Time Sliding Window ++ Three Colour Marker (TSWTCM)", RFC 2859, June 2000. ++ ++ [TON98] D.D. Clark, W. Fang, "Explicit Allocation of Best Effort ++ Packet Delivery Service", IEEE/ACM Transactions on ++ Networking, August 1998, Vol 6. No. 4, pp. 362-373. ++*/ ++ ++/* There are three important limits which need to be explored: maximum ++ expressable rate, minimum expressable rate, minimum packet size to ++ be countable. ++ ++ Maximum expressable rate depends on the size of the window and the ++ scale we have chosen. It is approximately 2^32 / window / ++ scale. For example with a window of 3 seconds and a scale of 100, ++ the maximum rate is 14 megabytes per second, eg. 115Mbit/s. ++ ++ Minimum expressable rate depends on scale and the HZ on the ++ architecture. It is HZ / scale. For example on most platforms where ++ HZ is now 1000, this is 10 bytes per second, eg. 0.08kbit/s. ++ ++ Minimum packet size to be countable depends on the window size, ++ scale and HZ. This is basically the smallest packet that when ++ arriving immediately after the previous packet can cause the ++ average rate to rise from zero to one. It is (HZ * window) / ++ scale. For example with a window of 3 seconds, a scale of 100 and a ++ HZ of 1000, this would be 30. That is, a continuous stream of ++ packets less than 30 bytes long would not be able to rise the rate ++ above zero. ++ ++ These limitations are a simple consequence of the current ++ implementation using integer arithmetics. */ ++ ++/* Maximum number of tokens in total that we can have in a window is ++ limited by the range of the u_int32_t datatype. We prevent the ++ overflow of this by first calculating the maximum amount of tokens ++ a single packet can add and substracting that from the maximum ++ value the window can get. */ ++#define MAX_PACKET_IN_TOKENS (0x0000ffff * IP_CONNTRACK_RATE_SCALE) ++#define MAX_TOKENS_IN_WINDOW (0xffffffff - MAX_PACKET_IN_TOKENS) ++ ++/* Synchronizes all accesses to ip_conntrack_rate structures. */ ++static DEFINE_RWLOCK(rate_lock); ++ ++void ++ip_conntrack_rate_count(struct ip_conntrack_rate *ctr, unsigned int len) ++{ ++ u_int32_t new_bytes; ++ unsigned long now = jiffies; ++ ++ write_lock_bh(&rate_lock); ++ new_bytes = (ctr->avgrate * IP_CONNTRACK_RATE_INTERVAL + ++ len * IP_CONNTRACK_RATE_SCALE); ++ if(new_bytes > MAX_TOKENS_IN_WINDOW) ++ new_bytes = MAX_TOKENS_IN_WINDOW; ++ if(now >= ctr->prev) /* Ignore packets at possible jiffie wraps */ ++ ctr->avgrate = new_bytes / (now - ctr->prev + ++ IP_CONNTRACK_RATE_INTERVAL); ++ ctr->prev = now; ++ write_unlock_bh(&rate_lock); ++} ++ ++u_int32_t ++ip_conntrack_rate_get(struct ip_conntrack_rate *ctr) ++{ ++ u_int32_t rate; ++ ++ read_lock_bh(&rate_lock); ++ /* Rate can not overflow here if IP_CONNTRACK_RATE_INTERVAL is ++ atleast HZ. If it is not, we could change the order of ++ calculations at the possible cost of precision. */ ++ rate = ctr->avgrate * HZ / IP_CONNTRACK_RATE_SCALE; ++ read_unlock_bh(&rate_lock); ++ return rate; ++} ++ ++EXPORT_SYMBOL(ip_conntrack_rate_count); ++EXPORT_SYMBOL(ip_conntrack_rate_get); +diff -Nur --exclude '*.orig' linux.org/net/ipv4/netfilter/ipt_connrate.c linux/net/ipv4/netfilter/ipt_connrate.c +--- linux.org/net/ipv4/netfilter/ipt_connrate.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux/net/ipv4/netfilter/ipt_connrate.c 2006-05-11 12:35:34.000000000 +0200 +@@ -0,0 +1,73 @@ ++/* Connection transfer rate match for netfilter. ++ * ++ * Copyright (c) 2004 Nuutti Kotivuori <[EMAIL PROTECTED]> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#include <linux/module.h> ++#include <linux/skbuff.h> ++#include <linux/netfilter_ipv4/ip_conntrack.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/netfilter_ipv4/ipt_connrate.h> ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Nuutti Kotivuori <[EMAIL PROTECTED]>"); ++MODULE_DESCRIPTION("iptables connection transfer rate match module"); ++ ++static int ++match(const struct sk_buff *skb, ++ const struct net_device *in, ++ const struct net_device *out, ++ const void *matchinfo, ++ int offset, ++ int *hotdrop) ++{ ++ const struct ipt_connrate_info *sinfo = matchinfo; ++ struct ip_conntrack *ct; ++ enum ip_conntrack_info ctinfo; ++ u_int32_t rate; ++ ++ if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo))) ++ return 0; /* no match */ ++ ++ rate = ip_conntrack_rate_get(&ct->rate); ++ if (sinfo->from > sinfo->to) /* inverted range */ ++ return (rate < sinfo->to || rate > sinfo->from); ++ else /* normal range */ ++ return (rate >= sinfo->from && rate <= sinfo->to); ++} ++ ++static int ++check(const char *tablename, ++ const struct ipt_ip *ip, ++ void *matchinfo, ++ unsigned int matchsize, ++ unsigned int hook_mask) ++{ ++ if (matchsize != IPT_ALIGN(sizeof(struct ipt_connrate_info))) ++ return 0; ++ ++ return 1; ++} ++ ++static struct ipt_match connrate_match = { ++ .name = "connrate", ++ .match = &match, ++ .checkentry = &check, ++ .me = THIS_MODULE ++}; ++ ++static int __init init(void) ++{ ++ return ipt_register_match(&connrate_match); ++} ++ ++static void __exit fini(void) ++{ ++ ipt_unregister_match(&connrate_match); ++} ++ ++module_init(init); ++module_exit(fini); ================================================================ _______________________________________________ pld-cvs-commit mailing list pld-cvs-commit@lists.pld-linux.org http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit