Module Name: src Committed By: ozaki-r Date: Wed Jul 21 06:41:22 UTC 2021
Modified Files: src/sys/altq: altq_cbq.h altq_rmclass.c altq_rmclass.h Log Message: altq, cbq: treat time in nanoseconds Time granularity in microseconds is sometimes not enough to provide accurate bandwidth control; actual bandwidth on a capped class can exceed its limit considerably in some cases. Treating time in nanoseconds requires the following changes: - Use timespec instead of timeval - Use nanotime(9) instead of microtime(9) - Change the type of some variables, especially *idle, to long To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/sys/altq/altq_cbq.h cvs rdiff -u -r1.25 -r1.26 src/sys/altq/altq_rmclass.c cvs rdiff -u -r1.10 -r1.11 src/sys/altq/altq_rmclass.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/altq/altq_cbq.h diff -u src/sys/altq/altq_cbq.h:1.9 src/sys/altq/altq_cbq.h:1.10 --- src/sys/altq/altq_cbq.h:1.9 Wed Jul 21 06:33:30 2021 +++ src/sys/altq/altq_cbq.h Wed Jul 21 06:41:22 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: altq_cbq.h,v 1.9 2021/07/21 06:33:30 ozaki-r Exp $ */ +/* $NetBSD: altq_cbq.h,v 1.10 2021/07/21 06:41:22 ozaki-r Exp $ */ /* $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $ */ /* @@ -81,15 +81,15 @@ typedef struct _cbq_class_stats_ { /* other static class parameters useful for debugging */ int priority; - int maxidle; - int minidle; - int offtime; + long maxidle; + long minidle; + long offtime; int qmax; u_long ps_per_byte; int wrr_allot; int qcnt; /* # packets in queue */ - int avgidle; + long avgidle; /* red and rio related info */ int qtype; Index: src/sys/altq/altq_rmclass.c diff -u src/sys/altq/altq_rmclass.c:1.25 src/sys/altq/altq_rmclass.c:1.26 --- src/sys/altq/altq_rmclass.c:1.25 Wed Jul 21 06:33:30 2021 +++ src/sys/altq/altq_rmclass.c Wed Jul 21 06:41:22 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: altq_rmclass.c,v 1.25 2021/07/21 06:33:30 ozaki-r Exp $ */ +/* $NetBSD: altq_rmclass.c,v 1.26 2021/07/21 06:41:22 ozaki-r Exp $ */ /* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */ /* @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: altq_rmclass.c,v 1.25 2021/07/21 06:33:30 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: altq_rmclass.c,v 1.26 2021/07/21 06:41:22 ozaki-r Exp $"); /* #ident "@(#)rm_class.c 1.48 97/12/05 SMI" */ @@ -86,7 +86,7 @@ __KERNEL_RCSID(0, "$NetBSD: altq_rmclass * Local routines. */ -static int rmc_satisfied(struct rm_class *, struct timeval *); +static int rmc_satisfied(struct rm_class *, struct timespec *); static void rmc_wrr_set_weights(struct rm_ifdat *); static void rmc_depth_compute(struct rm_class *); static void rmc_depth_recompute(rm_class_t *); @@ -99,8 +99,8 @@ static void _rmc_dropq(rm_class_t *); static mbuf_t *_rmc_getq(rm_class_t *); static mbuf_t *_rmc_pollq(rm_class_t *); -static int rmc_under_limit(struct rm_class *, struct timeval *); -static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *); +static int rmc_under_limit(struct rm_class *, struct timespec *); +static void rmc_tl_satisfied(struct rm_ifdat *, struct timespec *); static void rmc_drop_action(struct rm_class *); static void rmc_restart(struct rm_class *); static void rmc_root_overlimit(struct rm_class *, struct rm_class *); @@ -253,18 +253,18 @@ rmc_newclass(int pri, struct rm_ifdat *i cl->flags_ = flags; #if 1 /* minidle is also scaled in ALTQ */ - cl->minidle_ = (minidle * (int)PSEC_TO_NSEC(psecPerByte)) / 8; + cl->minidle_ = ((long)minidle * (long)psecPerByte) / 8; if (cl->minidle_ > 0) cl->minidle_ = 0; #else cl->minidle_ = minidle; #endif - cl->maxidle_ = (maxidle * PSEC_TO_NSEC(psecPerByte)) / 8; + cl->maxidle_ = ((long)maxidle * (long)psecPerByte) / 8; if (cl->maxidle_ == 0) cl->maxidle_ = 1; #if 1 /* offtime is also scaled in ALTQ */ cl->avgidle_ = cl->maxidle_; - cl->offtime_ = ((offtime * PSEC_TO_NSEC(psecPerByte)) / 8) >> RM_FILTER_GAIN; + cl->offtime_ = (((long)offtime * (long)psecPerByte) / 8) >> RM_FILTER_GAIN; if (cl->offtime_ == 0) cl->offtime_ = 1; #else @@ -365,18 +365,18 @@ rmc_modclass(struct rm_class *cl, u_long qlimit(cl->q_) = maxq; #if 1 /* minidle is also scaled in ALTQ */ - cl->minidle_ = (minidle * PSEC_TO_NSEC(psecPerByte)) / 8; + cl->minidle_ = ((long)minidle * (long)psecPerByte) / 8; if (cl->minidle_ > 0) cl->minidle_ = 0; #else cl->minidle_ = minidle; #endif - cl->maxidle_ = (maxidle * PSEC_TO_NSEC(psecPerByte)) / 8; + cl->maxidle_ = ((long)maxidle * (long)psecPerByte) / 8; if (cl->maxidle_ == 0) cl->maxidle_ = 1; #if 1 /* offtime is also scaled in ALTQ */ cl->avgidle_ = cl->maxidle_; - cl->offtime_ = ((offtime * PSEC_TO_NSEC(psecPerByte)) / 8) >> RM_FILTER_GAIN; + cl->offtime_ = (((long)offtime * (long)psecPerByte) / 8) >> RM_FILTER_GAIN; if (cl->offtime_ == 0) cl->offtime_ = 1; #else @@ -748,14 +748,14 @@ rmc_init(struct ifaltq *ifq, struct rm_i int rmc_queue_packet(struct rm_class *cl, mbuf_t *m) { - struct timeval now; + struct timespec now; struct rm_ifdat *ifd = cl->ifdat_; int cpri = cl->pri_; int is_empty = qempty(cl->q_); RM_GETTIME(now); if (ifd->cutoff_ > 0) { - if (TV_LT(&cl->undertime_, &now)) { + if (TS_LT(&cl->undertime_, &now)) { if (ifd->cutoff_ > cl->depth_) ifd->cutoff_ = cl->depth_; CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); @@ -771,7 +771,7 @@ rmc_queue_packet(struct rm_class *cl, mb while (borrow != NULL && borrow->depth_ < ifd->cutoff_) { - if (TV_LT(&borrow->undertime_, &now)) { + if (TS_LT(&borrow->undertime_, &now)) { ifd->cutoff_ = borrow->depth_; CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_); break; @@ -781,7 +781,7 @@ rmc_queue_packet(struct rm_class *cl, mb } #else /* !ALTQ */ else if ((ifd->cutoff_ > 1) && cl->borrow_) { - if (TV_LT(&cl->borrow_->undertime_, &now)) { + if (TS_LT(&cl->borrow_->undertime_, &now)) { ifd->cutoff_ = cl->borrow_->depth_; CBQTRACE(rmc_queue_packet, 'ffob', cl->borrow_->depth_); @@ -809,12 +809,12 @@ rmc_queue_packet(struct rm_class *cl, mb /* * void - * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all + * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timespec *now) - Check all * classes to see if there are satified. */ static void -rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) +rmc_tl_satisfied(struct rm_ifdat *ifd, struct timespec *now) { int i; rm_class_t *p, *bp; @@ -840,13 +840,13 @@ rmc_tl_satisfied(struct rm_ifdat *ifd, s */ static int -rmc_satisfied(struct rm_class *cl, struct timeval *now) +rmc_satisfied(struct rm_class *cl, struct timespec *now) { rm_class_t *p; if (cl == NULL) return (1); - if (TV_LT(now, &cl->undertime_)) + if (TS_LT(now, &cl->undertime_)) return (1); if (cl->depth_ == 0) { if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_)) @@ -873,7 +873,7 @@ rmc_satisfied(struct rm_class *cl, struc */ static int -rmc_under_limit(struct rm_class *cl, struct timeval *now) +rmc_under_limit(struct rm_class *cl, struct timespec *now) { rm_class_t *p = cl; rm_class_t *top; @@ -888,7 +888,7 @@ rmc_under_limit(struct rm_class *cl, str return (1); if (cl->sleeping_) { - if (TV_LT(now, &cl->undertime_)) + if (TS_LT(now, &cl->undertime_)) return (0); CALLOUT_STOP(&cl->callout_); @@ -898,7 +898,7 @@ rmc_under_limit(struct rm_class *cl, str } top = NULL; - while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { + while (cl->undertime_.tv_sec && TS_LT(now, &cl->undertime_)) { if (((cl = cl->borrow_) == NULL) || (cl->depth_ > ifd->cutoff_)) { #ifdef ADJUST_CUTOFF @@ -961,7 +961,7 @@ _rmc_wrr_dequeue_next(struct rm_ifdat *i u_int deficit; int cpri; mbuf_t *m; - struct timeval now; + struct timespec now; RM_GETTIME(now); @@ -1116,7 +1116,7 @@ _rmc_prr_dequeue_next(struct rm_ifdat *i mbuf_t *m; int cpri; struct rm_class *cl, *first = NULL; - struct timeval now; + struct timespec now; RM_GETTIME(now); @@ -1212,7 +1212,7 @@ _rmc_prr_dequeue_next(struct rm_ifdat *i /* * mbuf_t * - * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function + * rmc_dequeue_next(struct rm_ifdat *ifd, struct timespec *now) - this function * is invoked by the packet driver to get the next packet to be * dequeued and output on the link. If WRR is enabled, then the * WRR dequeue next routine will determine the next packet to sent. @@ -1253,12 +1253,12 @@ rmc_dequeue_next(struct rm_ifdat *ifd, i void rmc_update_class_util(struct rm_ifdat *ifd) { - int idle, avgidle, pktlen; - u_long pkt_time; - int tidle; + long idle, avgidle, pktlen; + long pkt_time; + long tidle; rm_class_t *cl, *cl0, *borrowed; rm_class_t *borrows; - struct timeval *nowp; + struct timespec *nowp; /* * Get the most recent completed class. @@ -1267,7 +1267,7 @@ rmc_update_class_util(struct rm_ifdat *i return; cl0 = cl; - pktlen = ifd->curlen_[ifd->qo_]; + pktlen = (long)ifd->curlen_[ifd->qo_]; borrowed = ifd->borrowed_[ifd->qo_]; borrows = borrowed; @@ -1286,13 +1286,13 @@ rmc_update_class_util(struct rm_ifdat *i nowp = &ifd->now_[ifd->qo_]; /* get pkt_time (for link) in usec */ #if 1 /* use approximation */ - pkt_time = (u_long)ifd->curlen_[ifd->qo_] * ifd->ps_per_byte_; - pkt_time = PSEC_TO_USEC(pkt_time); + pkt_time = (long)ifd->curlen_[ifd->qo_] * (long)ifd->ps_per_byte_; + pkt_time = PSEC_TO_NSEC(pkt_time); #else pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; #endif #if 1 /* ALTQ4PPP */ - if (TV_LT(nowp, &ifd->ifnow_)) { + if (TS_LT(nowp, &ifd->ifnow_)) { int iftime; /* @@ -1301,36 +1301,36 @@ rmc_update_class_util(struct rm_ifdat *i * data compression or the interface speed is set to * a much lower value. */ - TV_DELTA(&ifd->ifnow_, nowp, iftime); + TS_DELTA(&ifd->ifnow_, nowp, iftime); if (iftime+pkt_time < ifd->maxiftime_) { - TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); + TS_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); } else { - TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); + TS_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); } } else { - TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); + TS_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); } #else - if (TV_LT(nowp, &ifd->ifnow_)) { - TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); + if (TS_LT(nowp, &ifd->ifnow_)) { + TS_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); } else { - TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); + TS_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); } #endif while (cl != NULL) { - TV_DELTA(&ifd->ifnow_, &cl->last_, idle); - if (idle >= 2000000) + TS_DELTA(&ifd->ifnow_, &cl->last_, idle); + if (idle >= 2000000000) /* * this class is idle enough, reset avgidle. - * (TV_DELTA returns 2000000 us when delta is large.) + * (TS_DELTA returns 2000000000 ns when delta is large.) */ cl->avgidle_ = cl->maxidle_; /* get pkt_time (for class) in usec */ #if 1 /* use approximation */ - pkt_time = (u_long)pktlen * cl->ps_per_byte_; - pkt_time = PSEC_TO_USEC(pkt_time); + pkt_time = pktlen * (long)cl->ps_per_byte_; + pkt_time = PSEC_TO_NSEC(pkt_time); #else pkt_time = pktlen * cl->ns_per_byte_ / 1000; #endif @@ -1354,7 +1354,7 @@ rmc_update_class_util(struct rm_ifdat *i /* set next idle to make avgidle 0 */ tidle = pkt_time + (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); - TV_ADD_DELTA(nowp, tidle, &cl->undertime_); + TS_ADD_DELTA(nowp, tidle, &cl->undertime_); ++cl->stats_.over; } else { cl->avgidle_ = @@ -1391,7 +1391,7 @@ rmc_update_class_util(struct rm_ifdat *i cl = ifd->class_[ifd->qo_]; if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { #if 1 /* ALTQ */ - if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) { + if ((qlen(cl->q_) <= 0) || TS_LT(nowp, &borrowed->undertime_)) { rmc_tl_satisfied(ifd, nowp); CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); } else { @@ -1399,7 +1399,7 @@ rmc_update_class_util(struct rm_ifdat *i CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); } #else /* !ALTQ */ - if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) { + if ((qlen(cl->q_) <= 1) || TS_LT(&now, &borrowed->undertime_)) { reset_cutoff(ifd); #ifdef notdef rmc_tl_satisfied(ifd, &now); @@ -1484,13 +1484,14 @@ tvhzto(struct timeval *tv) void rmc_delay_action(struct rm_class *cl, struct rm_class *borrow) { - int ndelay, t, extradelay; + int t; + long ndelay, extradelay; cl->stats_.overactions++; if (borrow != NULL) - TV_DELTA(&borrow->undertime_, &cl->overtime_, ndelay); + TS_DELTA(&borrow->undertime_, &cl->overtime_, ndelay); else - TV_DELTA(&cl->undertime_, &cl->overtime_, ndelay); + TS_DELTA(&cl->undertime_, &cl->overtime_, ndelay); #ifndef BORROW_OFFTIME ndelay += cl->offtime_; #endif @@ -1516,7 +1517,7 @@ rmc_delay_action(struct rm_class *cl, st extradelay -= cl->last_pkttime_; #endif if (extradelay > 0) { - TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); + TS_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); ndelay += extradelay; } @@ -1530,13 +1531,13 @@ rmc_delay_action(struct rm_class *cl, st * NOTE: If there's no other traffic, we need the timer as * a 'backstop' to restart this class. */ - if (ndelay > tick * 2) { + if (NSEC_TO_USEC(ndelay) > tick * 2) { #ifdef __FreeBSD__ /* FreeBSD rounds up the tick */ t = tvhzto(&cl->undertime_); #else /* other BSDs round down the tick */ - t = tvhzto(&cl->undertime_) + 1; + t = tshzto(&cl->undertime_) + 1; #endif } else t = 2; Index: src/sys/altq/altq_rmclass.h diff -u src/sys/altq/altq_rmclass.h:1.10 src/sys/altq/altq_rmclass.h:1.11 --- src/sys/altq/altq_rmclass.h:1.10 Wed Jul 21 06:33:30 2021 +++ src/sys/altq/altq_rmclass.h Wed Jul 21 06:41:22 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: altq_rmclass.h,v 1.10 2021/07/21 06:33:30 ozaki-r Exp $ */ +/* $NetBSD: altq_rmclass.h,v 1.11 2021/07/21 06:41:22 ozaki-r Exp $ */ /* $KAME: altq_rmclass.h,v 1.10 2003/08/20 23:30:23 itojun Exp $ */ /* @@ -55,31 +55,15 @@ typedef struct rm_class rm_class_t; struct red; -/* - * Macros for dealing with time values. We assume all times are - * 'timevals'. `microtime' is used to get the best available clock - * resolution. If `microtime' *doesn't* return a value that's about - * ten times smaller than the average packet time on the fastest - * link that will use these routines, a slightly different clock - * scheme than this one should be used. - * (Bias due to truncation error in this scheme will overestimate utilization - * and discriminate against high bandwidth classes. To remove this bias an - * integrator needs to be added. The simplest integrator uses a history of - * 10 * avg.packet.time / min.tick.time packet completion entries. This is - * straight forward to add but we don't want to pay the extra memory - * traffic to maintain it if it's not necessary (occasionally a vendor - * accidentally builds a workstation with a decent clock - e.g., Sun & HP).) - */ - -#define RM_GETTIME(now) microtime(&now) +#define RM_GETTIME(now) nanotime(&now) -#define TV_LT(a, b) (((a)->tv_sec < (b)->tv_sec) || \ - (((a)->tv_usec < (b)->tv_usec) && ((a)->tv_sec <= (b)->tv_sec))) +#define TS_LT(a, b) (((a)->tv_sec < (b)->tv_sec) || \ + (((a)->tv_nsec < (b)->tv_nsec) && ((a)->tv_sec <= (b)->tv_sec))) -#define TV_DELTA(a, b, delta) do { \ - register int xxs; \ +#define TS_DELTA(a, b, delta) do { \ + register int64_t xxs; \ \ - delta = (a)->tv_usec - (b)->tv_usec; \ + delta = (int64_t)((a)->tv_nsec - (b)->tv_nsec); \ if ((xxs = (a)->tv_sec - (b)->tv_sec)) { \ switch (xxs) { \ default: \ @@ -88,24 +72,24 @@ struct red; delta = 0; \ /* fall through */ \ case 2: \ - delta += 1000000; \ + delta += 1000000000; \ /* fall through */ \ case 1: \ - delta += 1000000; \ + delta += 1000000000; \ break; \ } \ } \ } while (0) -#define TV_ADD_DELTA(a, delta, res) do { \ - register int xxus = (a)->tv_usec + (delta); \ +#define TS_ADD_DELTA(a, delta, res) do { \ + register long xxns = (a)->tv_nsec + (long)(delta); \ \ (res)->tv_sec = (a)->tv_sec; \ - while (xxus >= 1000000) { \ + while (xxns >= 1000000000) { \ ++((res)->tv_sec); \ - xxus -= 1000000; \ + xxns -= 1000000000; \ } \ - (res)->tv_usec = xxus; \ + (res)->tv_nsec = xxns; \ } while (0) #define RM_TIMEOUT 2 /* 1 Clock tick. */ @@ -148,10 +132,10 @@ struct rm_class { u_int w_allotment_; /* Weighted allotment for WRR */ int bytes_alloc_; /* Allocation for round of WRR */ - int avgidle_; - int maxidle_; - int minidle_; - int offtime_; + long avgidle_; + long maxidle_; + long minidle_; + long offtime_; int sleeping_; /* != 0 if delaying */ int qthresh_; /* Queue threshold for formal link sharing */ int leaf_; /* Note whether leaf class or not.*/ @@ -170,10 +154,10 @@ struct rm_class { struct altq_pktattr *pktattr_; /* saved hdr used by RED/ECN */ int flags_; - int last_pkttime_; /* saved pkt_time */ - struct timeval undertime_; /* time can next send */ - struct timeval last_; /* time last packet sent */ - struct timeval overtime_; + long last_pkttime_; /* saved pkt_time */ + struct timespec undertime_; /* time can next send */ + struct timespec last_; /* time last packet sent */ + struct timespec overtime_; struct callout callout_; /* for timeout() calls */ rm_class_stats_t stats_; /* Class Statistics */ @@ -216,12 +200,12 @@ struct rm_ifdat { rm_class_t *borrowed_[RM_MAXQUEUED]; /* Class borrowed last */ rm_class_t *class_[RM_MAXQUEUED]; /* class sending */ int curlen_[RM_MAXQUEUED]; /* Current pktlen */ - struct timeval now_[RM_MAXQUEUED]; /* Current packet time. */ + struct timespec now_[RM_MAXQUEUED]; /* Current packet time. */ int is_overlimit_[RM_MAXQUEUED];/* Current packet time. */ int cutoff_; /* Cut-off depth for borrowing */ - struct timeval ifnow_; /* expected xmit completion time */ + struct timespec ifnow_; /* expected xmit completion time */ #if 1 /* ALTQ4PPP */ int maxiftime_; /* max delay inside interface */ #endif