--- include/net/sch_generic.h.orig	2020-08-21 15:13:51.787952710 +0800
+++ include/net/sch_generic.h	2020-08-24 22:01:46.718709912 +0800
@@ -79,6 +79,7 @@
 #define TCQ_F_INVISIBLE		0x80 /* invisible by default in dump */
 #define TCQ_F_NOLOCK		0x100 /* qdisc does not require locking */
 #define TCQ_F_OFFLOADED		0x200 /* qdisc is offloaded to HW */
+	int                     pkt_seq;
 	u32			limit;
 	const struct Qdisc_ops	*ops;
 	struct qdisc_size_table	__rcu *stab;
--- net/sched/sch_generic.c.orig	2020-08-24 22:02:04.589830751 +0800
+++ net/sched/sch_generic.c	2020-08-24 22:03:48.010728381 +0800
@@ -638,6 +638,8 @@
 	 * so we better not use qdisc_qstats_cpu_backlog_inc()
 	 */
 	this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len);
+
+	qdisc->pkt_seq++;
 	return NET_XMIT_SUCCESS;
 }
 
--- include/net/pkt_sched.h.orig	2020-08-21 15:13:51.787952710 +0800
+++ include/net/pkt_sched.h	2020-08-24 22:06:58.856005213 +0800
@@ -116,9 +116,16 @@
 
 static inline void qdisc_run(struct Qdisc *q)
 {
-	if (qdisc_run_begin(q)) {
+	while (qdisc_run_begin(q)) {
+		int seq = q->pkt_seq;
 		__qdisc_run(q);
 		qdisc_run_end(q);
+
+		/* go another round if there are pkts enqueued after
+ 		* taking seq_lock
+ 		*/
+ 		if (seq != q->pkt_seq)
+			continue;
 	}
 }
 
