This patch changes HTB's class storage from hash+lists to a two-level linear
array, so it can do constant time (O(1)) class lookup by classid. It improves
scalability for large number of classes.
Without the patch, ~14k htb classes can starve a Xeon-3.2 at only 15kpps,
using most of it's cycles traversing lists in htb_find(). The patch
eliminates this problem, and has a measurable impact even with a few hundred
classes.
Previously, scalability could be improved by increasing HTB_HSIZE, modify the
hash function, and recompile, but this patch works for everyone without
recompile and scales better too.
The patch is for 2.6.20-rc6, I have older ones for 2.6.18 and 2.6.19 if anyone
is interested.
Signed-off-by: Simon Lodal <[EMAIL PROTECTED]>
--- linux-2.6.20-rc6.base/net/sched/sch_htb.c 2007-01-25 03:19:28.0 +0100
+++ linux-2.6.20-rc6/net/sched/sch_htb.c 2007-02-01 05:44:36.0 +0100
@@ -68,16 +68,37 @@
one less than their parent.
*/
-#define HTB_HSIZE 16 /* classid hash size */
-#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#define HTB_RATECM 1 /* whether to use rate computer */
+#define HTB_MAX_CLS (TC_H_MIN(-1)+1)
+#define HTB_CLS_ARRAY_SIZE PAGE_SIZE
+#define HTB_CLS_PER_ARRAY (HTB_CLS_ARRAY_SIZE / sizeof(void*))
+#define HTB_CLS_ARRAYS (HTB_MAX_CLS / HTB_CLS_PER_ARRAY)
+#define HTB_CLS_ARRAY(CID) (CID / HTB_CLS_PER_ARRAY)
+#define HTB_CLS_INDEX(CID) (CID & (HTB_CLS_PER_ARRAY-1))
+
+
#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */
-#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
+#define HTB_VER 0x30012 /* major must be matched with number suplied by TC as version */
#if HTB_VER >> 16 != TC_HTB_PROTOVER
#error "Mismatched sch_htb.c and pkt_sch.h"
#endif
+/* Whether to use rate computer. This is only used for statistics output to
+ userspace (tc -s class show dev ...); if you do not care about that and
+ want the last bit of performance, disable this. */
+#define HTB_RATECM
+#ifdef HTB_RATECM
+/* Time between rate computation updates, in seconds, for each class. */
+#define HTB_RATECM_UPDATE_INTERVAL 16
+/* How many HTB_RATECM_UPDATE_INTERVAL intervals to average over when
+ computing the rate. If set to 1, the computed rate will be exactly the
+ observed rate of the last interval. If set to higher values, the computed
+ rate will converge slower, but also vary less with small, temporary changes
+ in traffic.
+*/
+#define HTB_RATECM_AVERAGE_INTERVALS 2
+#endif
+
/* used internaly to keep status of single class */
enum htb_cmode {
HTB_CANT_SEND, /* class can't send and can't borrow */
@@ -104,7 +125,7 @@
/* topology */
int level; /* our level (see above) */
struct htb_class *parent; /* parent class */
- struct hlist_node hlist; /* classid hash list item */
+ struct list_head clist; /* classid list item */
struct list_head sibling; /* sibling list item */
struct list_head children; /* children list */
@@ -165,9 +186,13 @@
return rate->data[slot];
}
+typedef struct htb_class* htb_cls_array[HTB_CLS_PER_ARRAY];
+
struct htb_sched {
- struct list_head root; /* root classes list */
- struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */
+ struct list_head root; /* root classes list */
+ struct list_head clist; /* all classes list */
+ /* all classes arrays for fast lookup */
+ htb_cls_array* classes[HTB_CLS_ARRAYS];
struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
/* self list - roots of self generating tree */
@@ -198,8 +223,10 @@
psched_time_t now; /* cached dequeue time */
struct timer_list timer; /* send delay timer */
#ifdef HTB_RATECM
- struct timer_list rttim; /* rate computer timer */
- int recmp_bucket; /* which hash bucket to recompute next */
+ struct timer_list rttim;/* rate computer timer */
+ int clscnt; /* total number of classes */
+ struct list_head *rtcur;/* current class to update rate timer for */
+ int rtiter; /* current iteration (1..UPDATE_INTERVAL) */
#endif
/* non shaped skbs; let them go directly thru */
@@ -209,32 +236,22 @@
long direct_pkts;
};
-/* compute hash of size HTB_HSIZE for given handle */
-static inline int htb_hash(u32 h)
-{
-#if HTB_HSIZE != 16
-#error "Declare new hash for your HTB_HSIZE"
-#endif
- h ^= h >> 8; /* stolen from cbq_hash */
- h ^= h >> 4;
- return h & 0xf;
-}
-
-/* find class in global hash table using given handle */
+/* find class in class arrays using given handle */
static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
- struct hlist_node *p;
- struct htb_class *cl;
+ htb_cls_array* a;
+ int minorid;
if (TC_H_MAJ(handle) != sch->handle)
return NULL;
- hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
- if (cl->classid == handle)
- return cl;
- }
- return NULL;
+ minorid = TC_H_MIN(handle);
+ a = q->classes[HTB_CLS_ARRAY(minorid)];
+ if (a ==