Module Name: src
Committed By: ozaki-r
Date: Wed Jul 21 06:41:22 UTC 2021
Modified Files:
src/sys/altq: altq_cbq.h altq_rmclass.c altq_rmclass.h
Log Message:
altq, cbq: treat time in nanoseconds
Time granularity in microseconds is sometimes not enough to provide
accurate bandwidth control; actual bandwidth on a capped class
can exceed its limit considerably in some cases.
Treating time in nanoseconds requires the following changes:
- Use timespec instead of timeval
- Use nanotime(9) instead of microtime(9)
- Change the type of some variables, especially *idle, to long
To generate a diff of this commit:
cvs rdiff -u -r1.9 -r1.10 src/sys/altq/altq_cbq.h
cvs rdiff -u -r1.25 -r1.26 src/sys/altq/altq_rmclass.c
cvs rdiff -u -r1.10 -r1.11 src/sys/altq/altq_rmclass.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/altq/altq_cbq.h
diff -u src/sys/altq/altq_cbq.h:1.9 src/sys/altq/altq_cbq.h:1.10
--- src/sys/altq/altq_cbq.h:1.9 Wed Jul 21 06:33:30 2021
+++ src/sys/altq/altq_cbq.h Wed Jul 21 06:41:22 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: altq_cbq.h,v 1.9 2021/07/21 06:33:30 ozaki-r Exp $ */
+/* $NetBSD: altq_cbq.h,v 1.10 2021/07/21 06:41:22 ozaki-r Exp $ */
/* $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $ */
/*
@@ -81,15 +81,15 @@ typedef struct _cbq_class_stats_ {
/* other static class parameters useful for debugging */
int priority;
- int maxidle;
- int minidle;
- int offtime;
+ long maxidle;
+ long minidle;
+ long offtime;
int qmax;
u_long ps_per_byte;
int wrr_allot;
int qcnt; /* # packets in queue */
- int avgidle;
+ long avgidle;
/* red and rio related info */
int qtype;
Index: src/sys/altq/altq_rmclass.c
diff -u src/sys/altq/altq_rmclass.c:1.25 src/sys/altq/altq_rmclass.c:1.26
--- src/sys/altq/altq_rmclass.c:1.25 Wed Jul 21 06:33:30 2021
+++ src/sys/altq/altq_rmclass.c Wed Jul 21 06:41:22 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: altq_rmclass.c,v 1.25 2021/07/21 06:33:30 ozaki-r Exp $ */
+/* $NetBSD: altq_rmclass.c,v 1.26 2021/07/21 06:41:22 ozaki-r Exp $ */
/* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */
/*
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: altq_rmclass.c,v 1.25 2021/07/21 06:33:30 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: altq_rmclass.c,v 1.26 2021/07/21 06:41:22 ozaki-r Exp $");
/* #ident "@(#)rm_class.c 1.48 97/12/05 SMI" */
@@ -86,7 +86,7 @@ __KERNEL_RCSID(0, "$NetBSD: altq_rmclass
* Local routines.
*/
-static int rmc_satisfied(struct rm_class *, struct timeval *);
+static int rmc_satisfied(struct rm_class *, struct timespec *);
static void rmc_wrr_set_weights(struct rm_ifdat *);
static void rmc_depth_compute(struct rm_class *);
static void rmc_depth_recompute(rm_class_t *);
@@ -99,8 +99,8 @@ static void _rmc_dropq(rm_class_t *);
static mbuf_t *_rmc_getq(rm_class_t *);
static mbuf_t *_rmc_pollq(rm_class_t *);
-static int rmc_under_limit(struct rm_class *, struct timeval *);
-static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
+static int rmc_under_limit(struct rm_class *, struct timespec *);
+static void rmc_tl_satisfied(struct rm_ifdat *, struct timespec *);
static void rmc_drop_action(struct rm_class *);
static void rmc_restart(struct rm_class *);
static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
@@ -253,18 +253,18 @@ rmc_newclass(int pri, struct rm_ifdat *i
cl->flags_ = flags;
#if 1 /* minidle is also scaled in ALTQ */
- cl->minidle_ = (minidle * (int)PSEC_TO_NSEC(psecPerByte)) / 8;
+ cl->minidle_ = ((long)minidle * (long)psecPerByte) / 8;
if (cl->minidle_ > 0)
cl->minidle_ = 0;
#else
cl->minidle_ = minidle;
#endif
- cl->maxidle_ = (maxidle * PSEC_TO_NSEC(psecPerByte)) / 8;
+ cl->maxidle_ = ((long)maxidle * (long)psecPerByte) / 8;
if (cl->maxidle_ == 0)
cl->maxidle_ = 1;
#if 1 /* offtime is also scaled in ALTQ */
cl->avgidle_ = cl->maxidle_;
- cl->offtime_ = ((offtime * PSEC_TO_NSEC(psecPerByte)) / 8) >> RM_FILTER_GAIN;
+ cl->offtime_ = (((long)offtime * (long)psecPerByte) / 8) >> RM_FILTER_GAIN;
if (cl->offtime_ == 0)
cl->offtime_ = 1;
#else
@@ -365,18 +365,18 @@ rmc_modclass(struct rm_class *cl, u_long
qlimit(cl->q_) = maxq;
#if 1 /* minidle is also scaled in ALTQ */
- cl->minidle_ = (minidle * PSEC_TO_NSEC(psecPerByte)) / 8;
+ cl->minidle_ = ((long)minidle * (long)psecPerByte) / 8;
if (cl->minidle_ > 0)
cl->minidle_ = 0;
#else
cl->minidle_ = minidle;
#endif
- cl->maxidle_ = (maxidle * PSEC_TO_NSEC(psecPerByte)) / 8;
+ cl->maxidle_ = ((long)maxidle * (long)psecPerByte) / 8;
if (cl->maxidle_ == 0)
cl->maxidle_ = 1;
#if 1 /* offtime is also scaled in ALTQ */
cl->avgidle_ = cl->maxidle_;
- cl->offtime_ = ((offtime * PSEC_TO_NSEC(psecPerByte)) / 8) >> RM_FILTER_GAIN;
+ cl->offtime_ = (((long)offtime * (long)psecPerByte) / 8) >> RM_FILTER_GAIN;
if (cl->offtime_ == 0)
cl->offtime_ = 1;
#else
@@ -748,14 +748,14 @@ rmc_init(struct ifaltq *ifq, struct rm_i
int
rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
{
- struct timeval now;
+ struct timespec now;
struct rm_ifdat *ifd = cl->ifdat_;
int cpri = cl->pri_;
int is_empty = qempty(cl->q_);
RM_GETTIME(now);
if (ifd->cutoff_ > 0) {
- if (TV_LT(&cl->undertime_, &now)) {
+ if (TS_LT(&cl->undertime_, &now)) {
if (ifd->cutoff_ > cl->depth_)
ifd->cutoff_ = cl->depth_;
CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
@@ -771,7 +771,7 @@ rmc_queue_packet(struct rm_class *cl, mb
while (borrow != NULL &&
borrow->depth_ < ifd->cutoff_) {
- if (TV_LT(&borrow->undertime_, &now)) {
+ if (TS_LT(&borrow->undertime_, &now)) {
ifd->cutoff_ = borrow->depth_;
CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
break;
@@ -781,7 +781,7 @@ rmc_queue_packet(struct rm_class *cl, mb
}
#else /* !ALTQ */
else if ((ifd->cutoff_ > 1) && cl->borrow_) {
- if (TV_LT(&cl->borrow_->undertime_, &now)) {
+ if (TS_LT(&cl->borrow_->undertime_, &now)) {
ifd->cutoff_ = cl->borrow_->depth_;
CBQTRACE(rmc_queue_packet, 'ffob',
cl->borrow_->depth_);
@@ -809,12 +809,12 @@ rmc_queue_packet(struct rm_class *cl, mb
/*
* void
- * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
+ * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timespec *now) - Check all
* classes to see if there are satified.
*/
static void
-rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
+rmc_tl_satisfied(struct rm_ifdat *ifd, struct timespec *now)
{
int i;
rm_class_t *p, *bp;
@@ -840,13 +840,13 @@ rmc_tl_satisfied(struct rm_ifdat *ifd, s
*/
static int
-rmc_satisfied(struct rm_class *cl, struct timeval *now)
+rmc_satisfied(struct rm_class *cl, struct timespec *now)
{
rm_class_t *p;
if (cl == NULL)
return (1);
- if (TV_LT(now, &cl->undertime_))
+ if (TS_LT(now, &cl->undertime_))
return (1);
if (cl->depth_ == 0) {
if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
@@ -873,7 +873,7 @@ rmc_satisfied(struct rm_class *cl, struc
*/
static int
-rmc_under_limit(struct rm_class *cl, struct timeval *now)
+rmc_under_limit(struct rm_class *cl, struct timespec *now)
{
rm_class_t *p = cl;
rm_class_t *top;
@@ -888,7 +888,7 @@ rmc_under_limit(struct rm_class *cl, str
return (1);
if (cl->sleeping_) {
- if (TV_LT(now, &cl->undertime_))
+ if (TS_LT(now, &cl->undertime_))
return (0);
CALLOUT_STOP(&cl->callout_);
@@ -898,7 +898,7 @@ rmc_under_limit(struct rm_class *cl, str
}
top = NULL;
- while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
+ while (cl->undertime_.tv_sec && TS_LT(now, &cl->undertime_)) {
if (((cl = cl->borrow_) == NULL) ||
(cl->depth_ > ifd->cutoff_)) {
#ifdef ADJUST_CUTOFF
@@ -961,7 +961,7 @@ _rmc_wrr_dequeue_next(struct rm_ifdat *i
u_int deficit;
int cpri;
mbuf_t *m;
- struct timeval now;
+ struct timespec now;
RM_GETTIME(now);
@@ -1116,7 +1116,7 @@ _rmc_prr_dequeue_next(struct rm_ifdat *i
mbuf_t *m;
int cpri;
struct rm_class *cl, *first = NULL;
- struct timeval now;
+ struct timespec now;
RM_GETTIME(now);
@@ -1212,7 +1212,7 @@ _rmc_prr_dequeue_next(struct rm_ifdat *i
/*
* mbuf_t *
- * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
+ * rmc_dequeue_next(struct rm_ifdat *ifd, struct timespec *now) - this function
* is invoked by the packet driver to get the next packet to be
* dequeued and output on the link. If WRR is enabled, then the
* WRR dequeue next routine will determine the next packet to sent.
@@ -1253,12 +1253,12 @@ rmc_dequeue_next(struct rm_ifdat *ifd, i
void
rmc_update_class_util(struct rm_ifdat *ifd)
{
- int idle, avgidle, pktlen;
- u_long pkt_time;
- int tidle;
+ long idle, avgidle, pktlen;
+ long pkt_time;
+ long tidle;
rm_class_t *cl, *cl0, *borrowed;
rm_class_t *borrows;
- struct timeval *nowp;
+ struct timespec *nowp;
/*
* Get the most recent completed class.
@@ -1267,7 +1267,7 @@ rmc_update_class_util(struct rm_ifdat *i
return;
cl0 = cl;
- pktlen = ifd->curlen_[ifd->qo_];
+ pktlen = (long)ifd->curlen_[ifd->qo_];
borrowed = ifd->borrowed_[ifd->qo_];
borrows = borrowed;
@@ -1286,13 +1286,13 @@ rmc_update_class_util(struct rm_ifdat *i
nowp = &ifd->now_[ifd->qo_];
/* get pkt_time (for link) in usec */
#if 1 /* use approximation */
- pkt_time = (u_long)ifd->curlen_[ifd->qo_] * ifd->ps_per_byte_;
- pkt_time = PSEC_TO_USEC(pkt_time);
+ pkt_time = (long)ifd->curlen_[ifd->qo_] * (long)ifd->ps_per_byte_;
+ pkt_time = PSEC_TO_NSEC(pkt_time);
#else
pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
#endif
#if 1 /* ALTQ4PPP */
- if (TV_LT(nowp, &ifd->ifnow_)) {
+ if (TS_LT(nowp, &ifd->ifnow_)) {
int iftime;
/*
@@ -1301,36 +1301,36 @@ rmc_update_class_util(struct rm_ifdat *i
* data compression or the interface speed is set to
* a much lower value.
*/
- TV_DELTA(&ifd->ifnow_, nowp, iftime);
+ TS_DELTA(&ifd->ifnow_, nowp, iftime);
if (iftime+pkt_time < ifd->maxiftime_) {
- TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ TS_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
} else {
- TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
+ TS_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
}
} else {
- TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ TS_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
}
#else
- if (TV_LT(nowp, &ifd->ifnow_)) {
- TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ if (TS_LT(nowp, &ifd->ifnow_)) {
+ TS_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
} else {
- TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ TS_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
}
#endif
while (cl != NULL) {
- TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
- if (idle >= 2000000)
+ TS_DELTA(&ifd->ifnow_, &cl->last_, idle);
+ if (idle >= 2000000000)
/*
* this class is idle enough, reset avgidle.
- * (TV_DELTA returns 2000000 us when delta is large.)
+ * (TS_DELTA returns 2000000000 ns when delta is large.)
*/
cl->avgidle_ = cl->maxidle_;
/* get pkt_time (for class) in usec */
#if 1 /* use approximation */
- pkt_time = (u_long)pktlen * cl->ps_per_byte_;
- pkt_time = PSEC_TO_USEC(pkt_time);
+ pkt_time = pktlen * (long)cl->ps_per_byte_;
+ pkt_time = PSEC_TO_NSEC(pkt_time);
#else
pkt_time = pktlen * cl->ns_per_byte_ / 1000;
#endif
@@ -1354,7 +1354,7 @@ rmc_update_class_util(struct rm_ifdat *i
/* set next idle to make avgidle 0 */
tidle = pkt_time +
(((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
- TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
+ TS_ADD_DELTA(nowp, tidle, &cl->undertime_);
++cl->stats_.over;
} else {
cl->avgidle_ =
@@ -1391,7 +1391,7 @@ rmc_update_class_util(struct rm_ifdat *i
cl = ifd->class_[ifd->qo_];
if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
#if 1 /* ALTQ */
- if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
+ if ((qlen(cl->q_) <= 0) || TS_LT(nowp, &borrowed->undertime_)) {
rmc_tl_satisfied(ifd, nowp);
CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
} else {
@@ -1399,7 +1399,7 @@ rmc_update_class_util(struct rm_ifdat *i
CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
}
#else /* !ALTQ */
- if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
+ if ((qlen(cl->q_) <= 1) || TS_LT(&now, &borrowed->undertime_)) {
reset_cutoff(ifd);
#ifdef notdef
rmc_tl_satisfied(ifd, &now);
@@ -1484,13 +1484,14 @@ tvhzto(struct timeval *tv)
void
rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
{
- int ndelay, t, extradelay;
+ int t;
+ long ndelay, extradelay;
cl->stats_.overactions++;
if (borrow != NULL)
- TV_DELTA(&borrow->undertime_, &cl->overtime_, ndelay);
+ TS_DELTA(&borrow->undertime_, &cl->overtime_, ndelay);
else
- TV_DELTA(&cl->undertime_, &cl->overtime_, ndelay);
+ TS_DELTA(&cl->undertime_, &cl->overtime_, ndelay);
#ifndef BORROW_OFFTIME
ndelay += cl->offtime_;
#endif
@@ -1516,7 +1517,7 @@ rmc_delay_action(struct rm_class *cl, st
extradelay -= cl->last_pkttime_;
#endif
if (extradelay > 0) {
- TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
+ TS_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
ndelay += extradelay;
}
@@ -1530,13 +1531,13 @@ rmc_delay_action(struct rm_class *cl, st
* NOTE: If there's no other traffic, we need the timer as
* a 'backstop' to restart this class.
*/
- if (ndelay > tick * 2) {
+ if (NSEC_TO_USEC(ndelay) > tick * 2) {
#ifdef __FreeBSD__
/* FreeBSD rounds up the tick */
t = tvhzto(&cl->undertime_);
#else
/* other BSDs round down the tick */
- t = tvhzto(&cl->undertime_) + 1;
+ t = tshzto(&cl->undertime_) + 1;
#endif
} else
t = 2;
Index: src/sys/altq/altq_rmclass.h
diff -u src/sys/altq/altq_rmclass.h:1.10 src/sys/altq/altq_rmclass.h:1.11
--- src/sys/altq/altq_rmclass.h:1.10 Wed Jul 21 06:33:30 2021
+++ src/sys/altq/altq_rmclass.h Wed Jul 21 06:41:22 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: altq_rmclass.h,v 1.10 2021/07/21 06:33:30 ozaki-r Exp $ */
+/* $NetBSD: altq_rmclass.h,v 1.11 2021/07/21 06:41:22 ozaki-r Exp $ */
/* $KAME: altq_rmclass.h,v 1.10 2003/08/20 23:30:23 itojun Exp $ */
/*
@@ -55,31 +55,15 @@ typedef struct rm_class rm_class_t;
struct red;
-/*
- * Macros for dealing with time values. We assume all times are
- * 'timevals'. `microtime' is used to get the best available clock
- * resolution. If `microtime' *doesn't* return a value that's about
- * ten times smaller than the average packet time on the fastest
- * link that will use these routines, a slightly different clock
- * scheme than this one should be used.
- * (Bias due to truncation error in this scheme will overestimate utilization
- * and discriminate against high bandwidth classes. To remove this bias an
- * integrator needs to be added. The simplest integrator uses a history of
- * 10 * avg.packet.time / min.tick.time packet completion entries. This is
- * straight forward to add but we don't want to pay the extra memory
- * traffic to maintain it if it's not necessary (occasionally a vendor
- * accidentally builds a workstation with a decent clock - e.g., Sun & HP).)
- */
-
-#define RM_GETTIME(now) microtime(&now)
+#define RM_GETTIME(now) nanotime(&now)
-#define TV_LT(a, b) (((a)->tv_sec < (b)->tv_sec) || \
- (((a)->tv_usec < (b)->tv_usec) && ((a)->tv_sec <= (b)->tv_sec)))
+#define TS_LT(a, b) (((a)->tv_sec < (b)->tv_sec) || \
+ (((a)->tv_nsec < (b)->tv_nsec) && ((a)->tv_sec <= (b)->tv_sec)))
-#define TV_DELTA(a, b, delta) do { \
- register int xxs; \
+#define TS_DELTA(a, b, delta) do { \
+ register int64_t xxs; \
\
- delta = (a)->tv_usec - (b)->tv_usec; \
+ delta = (int64_t)((a)->tv_nsec - (b)->tv_nsec); \
if ((xxs = (a)->tv_sec - (b)->tv_sec)) { \
switch (xxs) { \
default: \
@@ -88,24 +72,24 @@ struct red;
delta = 0; \
/* fall through */ \
case 2: \
- delta += 1000000; \
+ delta += 1000000000; \
/* fall through */ \
case 1: \
- delta += 1000000; \
+ delta += 1000000000; \
break; \
} \
} \
} while (0)
-#define TV_ADD_DELTA(a, delta, res) do { \
- register int xxus = (a)->tv_usec + (delta); \
+#define TS_ADD_DELTA(a, delta, res) do { \
+ register long xxns = (a)->tv_nsec + (long)(delta); \
\
(res)->tv_sec = (a)->tv_sec; \
- while (xxus >= 1000000) { \
+ while (xxns >= 1000000000) { \
++((res)->tv_sec); \
- xxus -= 1000000; \
+ xxns -= 1000000000; \
} \
- (res)->tv_usec = xxus; \
+ (res)->tv_nsec = xxns; \
} while (0)
#define RM_TIMEOUT 2 /* 1 Clock tick. */
@@ -148,10 +132,10 @@ struct rm_class {
u_int w_allotment_; /* Weighted allotment for WRR */
int bytes_alloc_; /* Allocation for round of WRR */
- int avgidle_;
- int maxidle_;
- int minidle_;
- int offtime_;
+ long avgidle_;
+ long maxidle_;
+ long minidle_;
+ long offtime_;
int sleeping_; /* != 0 if delaying */
int qthresh_; /* Queue threshold for formal link sharing */
int leaf_; /* Note whether leaf class or not.*/
@@ -170,10 +154,10 @@ struct rm_class {
struct altq_pktattr *pktattr_; /* saved hdr used by RED/ECN */
int flags_;
- int last_pkttime_; /* saved pkt_time */
- struct timeval undertime_; /* time can next send */
- struct timeval last_; /* time last packet sent */
- struct timeval overtime_;
+ long last_pkttime_; /* saved pkt_time */
+ struct timespec undertime_; /* time can next send */
+ struct timespec last_; /* time last packet sent */
+ struct timespec overtime_;
struct callout callout_; /* for timeout() calls */
rm_class_stats_t stats_; /* Class Statistics */
@@ -216,12 +200,12 @@ struct rm_ifdat {
rm_class_t *borrowed_[RM_MAXQUEUED]; /* Class borrowed last */
rm_class_t *class_[RM_MAXQUEUED]; /* class sending */
int curlen_[RM_MAXQUEUED]; /* Current pktlen */
- struct timeval now_[RM_MAXQUEUED]; /* Current packet time. */
+ struct timespec now_[RM_MAXQUEUED]; /* Current packet time. */
int is_overlimit_[RM_MAXQUEUED];/* Current packet time. */
int cutoff_; /* Cut-off depth for borrowing */
- struct timeval ifnow_; /* expected xmit completion time */
+ struct timespec ifnow_; /* expected xmit completion time */
#if 1 /* ALTQ4PPP */
int maxiftime_; /* max delay inside interface */
#endif