[PATCH v2 iproute2-next] Add SKB Priority qdisc support in tc(8)

2018-08-13 Thread Nishanth Devarajan
sch_skbprio is a qdisc that prioritizes packets according to their skb->priority
field. Under congestion, it drops already-enqueued lower priority packets to
make space available for higher priority packets. Skbprio was conceived as a
solution for denial-of-service defenses that need to route packets with
different priorities as a means to overcome DoS attacks.

Signed-off-by: Nishanth Devarajan 
Reviewed-by: Michel Machado 
---
v2
*Patch applies cleanly, fixes for proper code indentation.
---
 man/man8/tc-skbprio.8 | 70 ++
 tc/Makefile   |  1 +
 tc/q_skbprio.c| 84 +++
 3 files changed, 155 insertions(+)
 create mode 100644 man/man8/tc-skbprio.8
 create mode 100644 tc/q_skbprio.c

diff --git a/man/man8/tc-skbprio.8 b/man/man8/tc-skbprio.8
new file mode 100644
index 000..844bbf4
--- /dev/null
+++ b/man/man8/tc-skbprio.8
@@ -0,0 +1,70 @@
+.TH SKBPRIO 8 "13 August 2018" "iproute2" "Linux"
+.SH NAME
+skbprio \- SKB Priority Queue
+
+.SH SYNOPSIS
+.B tc qdisc ... add skbprio
+.B [ limit
+packets
+.B ]
+
+.SH DESCRIPTION
+SKB Priority Queue is a queueing discipline intended to prioritize
+the most important packets during a denial-of-service (
+.B DoS
+) attack. The priority of a packet is given by
+.B skb->priority
+, where a higher value places the packet closer to the exit of the queue. When
+the queue is full, the lowest priority packet in the queue is dropped to make
+room for the packet to be added if it has higher priority. If the packet to be
+added has lower priority than all packets in the queue, it is dropped.
+
+Without SKB priority queue, queue length limits must be imposed
+on individual sub-queues, and there is no straightforward way to enforce
+a global queue length limit across all priorities. SKBprio queue enforces
+a global queue length limit while not restricting the lengths of
+individual sub-queues.
+
+While SKB Priority Queue is agnostic to how
+.B skb->priority
+is assigned. A typical use case is to copy
+the 6-bit DS field of IPv4 and IPv6 packets using
+.BR tc-skbedit (8)
+. If
+.B skb->priority
+is greater or equal to 64, the priority is assumed to be 63.
+Priorities less than 64 are taken at face value.
+
+SKB Priority Queue enables routers to locally decide which
+packets to drop under a DoS attack.
+Priorities should be assigned to packets such that the higher the priority,
+the more expected behavior a source shows.
+So sources have an incentive to play by the rules.
+
+.SH ALGORITHM
+
+Skbprio maintains 64 lists (priorities go from 0 to 63).
+When a packet is enqueued, it gets inserted at the
+.B tail
+of its priority list. When a packet needs to be sent out to the network, it is
+taken from the head of the highest priority list. When the queue is full,
+the packet at the tail of the lowest priority list is dropped to serve the
+ingress packet - if it is of higher priority, otherwise the ingress packet is
+dropped. This algorithm allocates as much bandwidth as possible to high
+priority packets, while only servicing low priority packets when
+there is enough bandwidth.
+
+.SH PARAMETERS
+.TP
+limit
+Maximum queue size specified in packets. It defaults to 64.
+The range for this parameter is [0, UINT32_MAX].
+
+.SH SEE ALSO
+.BR tc-prio (8),
+.BR tc-skbedit (8)
+
+.SH AUTHORS
+Nishanth Devarajan , Michel Machado 
+
+This manpage maintained by Bert Hubert 
diff --git a/tc/Makefile b/tc/Makefile
index 36cde2f..5a1a7ff 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -12,6 +12,7 @@ TCMODULES += q_fifo.o
 TCMODULES += q_sfq.o
 TCMODULES += q_red.o
 TCMODULES += q_prio.o
+TCMODULES += q_skbprio.o
 TCMODULES += q_tbf.o
 TCMODULES += q_cbq.o
 TCMODULES += q_rr.o
diff --git a/tc/q_skbprio.c b/tc/q_skbprio.c
new file mode 100644
index 000..2b6d78f
--- /dev/null
+++ b/tc/q_skbprio.c
@@ -0,0 +1,84 @@
+/*
+ * q_skbprio.c SKB PRIORITY QUEUE.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors:Nishanth Devarajan, 
+ *
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+   fprintf(stderr, "Usage: ...  [ limit NUMBER ]\n");
+}
+
+static int skbprio_parse_opt(struct qdisc_util *qu, int argc, char **argv,
+struct nlmsghdr *n, const char *dev)
+{
+   int ok = 0;
+   struct tc_skbprio_qopt opt = {};
+
+   while (argc > 0) {
+   if (strcmp(*argv, "limit") == 0) {
+   NEXT_ARG();
+ 

[PATCH iproute2-next] Add SKB Priority qdisc support in tc(8)

2018-08-08 Thread Nishanth Devarajan
sch_skbprio is a qdisc that prioritizes packets according to their skb->priority
field. Under congestion, it drops already-enqueued lower priority packets to
make space available for higher priority packets. Skbprio was conceived as a
solution for denial-of-service defenses that need to route packets with
different priorities as a means to overcome DoS attacks.

Signed-off-by: Nishanth Devarajan 
Reviewed-by: Michel Machado 
---
 include/uapi/linux/pkt_sched.h |  7 
 man/man8/tc-skbprio.8  | 70 
 tc/Makefile|  1 +
 tc/q_skbprio.c | 81 ++
 4 files changed, 159 insertions(+)
 create mode 100644 man/man8/tc-skbprio.8
 create mode 100644 tc/q_skbprio.c

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 37b5096..81af99e 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,12 @@ struct tc_fifo_qopt {
__u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+struct tc_skbprio_qopt {
+   __u32   limit;  /* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS 16
@@ -256,6 +262,7 @@ struct tc_red_qopt {
 #define TC_RED_ECN 1
 #define TC_RED_HARDDROP2
 #define TC_RED_ADAPTATIVE  4
+#define TC_RED_OFFLOADED   8
 };
 
 struct tc_red_xstats {
diff --git a/man/man8/tc-skbprio.8 b/man/man8/tc-skbprio.8
new file mode 100644
index 000..15a8524
--- /dev/null
+++ b/man/man8/tc-skbprio.8
@@ -0,0 +1,70 @@
+.TH SKBPRIO 8 "8 August 2018" "iproute2" "Linux"
+.SH NAME
+skbprio \- SKB Priority Queue
+
+.SH SYNOPSIS
+.B tc qdisc ... add skbprio
+.B [ limit
+packets
+.B ]
+
+.SH DESCRIPTION
+SKB Priority Queue is a queueing discipline intended to prioritize
+the most important packets during a denial-of-service (
+.B DoS
+) attack. The priority of a packet is given by
+.B skb->priority
+, where a higher value places the packet closer to the exit of the queue. When
+the queue is full, the lowest priority packet in the queue is dropped to make
+room for the packet to be added if it has higher priority. If the packet to be
+added has lower priority than all packets in the queue, it is dropped.
+
+Without SKB priority queue, queue length limits must be imposed
+on individual sub-queues, and there is no straightforward way to enforce
+a global queue length limit across all priorities. SKBprio queue enforces a
+global queue length limit while not restricting the lengths of individual
+sub-queues.
+
+While SKB Priority Queue is agnostic to how
+.B skb->priority
+is assigned. A typical use case is to copy
+the 6-bit DS field of IPv4 and IPv6 packets using
+.BR tc-skbedit (8)
+. If
+.B skb->priority
+is greater or equal to 64, the priority is assumed to be 63.
+Priorities less than 64 are taken at face value.
+
+SKB Priority Queue enables routers to locally decide which
+packets to drop under a DoS attack.
+Priorities should be assigned to packets such that the higher the priority,
+the more expected behavior a source shows.
+So sources have an incentive to play by the rules.
+
+.SH ALGORITHM
+
+Skbprio maintains 64 lists (priorities go from 0 to 63).
+When a packet is enqueued, it gets inserted at the
+.B tail
+of its priority list. When a packet needs to be sent out to the network, it is
+taken from the head of the highest priority list. When the queue is full,
+the packet at the tail of the lowest priority list is dropped to serve the
+ingress packet - if it is of higher priority, otherwise the ingress packet is
+dropped. This algorithm allocates as much bandwidth as possible to high
+priority packets, while only servicing low priority packets when
+there is enough bandwidth.
+
+.SH PARAMETERS
+.TP
+limit
+Maximum queue size specified in packets. It defaults to 64.
+The range for this parameter is [0, UINT32_MAX].
+
+.SH SEE ALSO
+.BR tc-prio (8),
+.BR tc-skbedit (8)
+
+.SH AUTHORS
+Nishanth Devarajan , Michel Machado 
+
+This manpage maintained by Bert Hubert 
diff --git a/tc/Makefile b/tc/Makefile
index dfd0026..7646105 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -71,6 +71,7 @@ TCMODULES += q_clsact.o
 TCMODULES += e_bpf.o
 TCMODULES += f_matchall.o
 TCMODULES += q_cbs.o
+TCMODULES += q_skbprio.o
 
 TCSO :=
 ifeq ($(TC_CONFIG_ATM),y)
diff --git a/tc/q_skbprio.c b/tc/q_skbprio.c
new file mode 100644
index 000..a2a5077
--- /dev/null
+++ b/tc/q_skbprio.c
@@ -0,0 +1,81 @@
+/*
+ * q_skbprio.c SKB PRIORITY QUEUE.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors:Nishanth Devarajan, 

[PATCH iproute2-next] Add tc(8) userspace support for SKB Priority qdisc

2018-07-28 Thread Nishanth Devarajan
sch_skbprio is a qdisc that prioritizes packets according to their skb->priority
field. Under congestion, it drops already-enqueued lower priority packets to
make space available for higher priority packets. Skbprio was conceived as a
solution for denial-of-service defenses that need to route packets with
different priorities as a means to overcome DoS attacks.

Signed-off-by: Nishanth Devarajan 
Reviewed-by: Michel Machado 
---
 include/uapi/linux/pkt_sched.h |  7 
 man/man8/tc-skbprio.8  | 70 
 tc/Makefile|  1 +
 tc/q_skbprio.c | 81 ++
 4 files changed, 159 insertions(+)
 create mode 100644 man/man8/tc-skbprio.8
 create mode 100644 tc/q_skbprio.c

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 37b5096..81af99e 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,12 @@ struct tc_fifo_qopt {
__u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+struct tc_skbprio_qopt {
+   __u32   limit;  /* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS 16
@@ -256,6 +262,7 @@ struct tc_red_qopt {
 #define TC_RED_ECN 1
 #define TC_RED_HARDDROP2
 #define TC_RED_ADAPTATIVE  4
+#define TC_RED_OFFLOADED   8
 };
 
 struct tc_red_xstats {
diff --git a/man/man8/tc-skbprio.8 b/man/man8/tc-skbprio.8
new file mode 100644
index 000..ae4f9e1
--- /dev/null
+++ b/man/man8/tc-skbprio.8
@@ -0,0 +1,70 @@
+.TH SKBPRIO 8 "27 July 2018" "iproute2" "Linux"
+.SH NAME
+skbprio \- SKB Priority Queue
+
+.SH SYNOPSIS
+.B tc qdisc ... add skbprio
+.B [ limit
+packets
+.B ]
+
+.SH DESCRIPTION
+SKB Priority Queue is a queueing discipline intended to prioritize
+the most important packets during a denial-of-service (
+.B DoS
+) attack. The priority of a packet is given by
+.B skb->priority
+, where a higher value places the packet closer to the exit of the queue. When
+the queue is full, the lowest priority packet in the queue is dropped to make
+room for the packet to be added if it has higher priority. If the packet to be
+added has lower priority than all packets in the queue, it is dropped.
+
+Without SKB priority queue, queue length limits must be imposed
+on individual sub-queues, and there is no straightforward way to enforce
+a global queue length limit across all priorities. SKBprio queue enforces a
+global queue length limit while not restricting the lengths of individual
+sub-queues.
+
+While SKB Priority Queue is agnostic to how
+.B skb->priority
+is assigned. A typical use case is to copy
+the 6-bit DS field of IPv4 and IPv6 packets using
+.BR tc-skbedit (8)
+. If
+.B skb->priority
+is greater or equal to 64, the priority is assumed to be 63.
+Priorities less than 64 are taken at face value.
+
+SKB Priority Queue enables routers to locally decide which
+packets to drop under a DoS attack.
+Priorities should be assigned to packets such that the higher the priority,
+the more expected behavior a source shows.
+So sources have an incentive to play by the rules.
+
+.SH ALGORITHM
+
+Skbprio maintains 64 lists (priorities go from 0 to 63).
+When a packet is enqueued, it gets inserted at the
+.B tail
+of its priority list. When a packet needs to be sent out to the network, it is
+taken from the head of the highest priority list. When the queue is full,
+the packet at the tail of the lowest priority list is dropped to serve the
+ingress packet - if it is of higher priority, otherwise the ingress packet is
+dropped. This algorithm allocates as much bandwidth as possible to high
+priority packets, while only servicing low priority packets when
+there is enough bandwidth.
+
+.SH PARAMETERS
+.TP
+limit
+Maximum queue size specified in packets. It defaults to 64.
+The range for this parameter is [0, UINT32_MAX].
+
+.SH SEE ALSO
+.BR tc-prio (8),
+.BR tc-skbedit (8)
+
+.SH AUTHORS
+Nishanth Devarajan , Michel Machado 
+
+This manpage maintained by Bert Hubert 
diff --git a/tc/Makefile b/tc/Makefile
index dfd0026..7646105 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -71,6 +71,7 @@ TCMODULES += q_clsact.o
 TCMODULES += e_bpf.o
 TCMODULES += f_matchall.o
 TCMODULES += q_cbs.o
+TCMODULES += q_skbprio.o
 
 TCSO :=
 ifeq ($(TC_CONFIG_ATM),y)
diff --git a/tc/q_skbprio.c b/tc/q_skbprio.c
new file mode 100644
index 000..a2a5077
--- /dev/null
+++ b/tc/q_skbprio.c
@@ -0,0 +1,81 @@
+/*
+ * q_skbprio.c SKB PRIORITY QUEUE.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors:Nishanth Devarajan, 

[PATCH v5 net-next] net/sched: add skbprio scheduler

2018-07-23 Thread Nishanth Devarajan
net/sched: add skbprio scheduler

Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
according to their skb->priority field. Under congestion, already-enqueued lower
priority packets will be dropped to make space available for higher priority
packets. Skbprio was conceived as a solution for denial-of-service defenses that
need to route packets with different priorities as a means to overcome DoS
attacks.

v5
*Do not reference qdisc_dev(sch)->tx_queue_len for setting limit. Instead set
default sch->limit to 64.

v4
*Drop Documentation/networking/sch_skbprio.txt doc file to move it to tc man
page for Skbprio, in iproute2.

v3
*Drop max_limit parameter in struct skbprio_sched_data and instead use
sch->limit.

*Reference qdisc_dev(sch)->tx_queue_len only once, during initialisation for
qdisc (previously being referenced every time qdisc changes).

*Move qdisc's detailed description from in-code to Documentation/networking.

*When qdisc is saturated, enqueue incoming packet first before dequeueing
lowest priority packet in queue - improves usage of call stack registers.

*Introduce and use overlimit stat to keep track of number of dropped packets.

v2
*Use skb->priority field rather than DS field. Rename queueing discipline as
SKB Priority Queue (previously Gatekeeper Priority Queue).

*Queueing discipline is made classful to expose Skbprio's internal priority
queues.

Signed-off-by: Nishanth Devarajan 
Reviewed-by: Sachin Paryani 
Reviewed-by: Cody Doucette 
Reviewed-by: Michel Machado 
---
 include/uapi/linux/pkt_sched.h |  15 ++
 net/sched/Kconfig  |  13 ++
 net/sched/Makefile |   1 +
 net/sched/sch_skbprio.c| 320 +
 4 files changed, 349 insertions(+)
 create mode 100644 net/sched/sch_skbprio.c

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index d9cc9dc..8975fd1 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,21 @@ struct tc_fifo_qopt {
__u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+   __u32   limit;  /* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS 16
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 7af2467..7699344 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -251,6 +251,19 @@ config NET_SCH_MQPRIO
 
  If unsure, say N.
 
+config NET_SCH_SKBPRIO
+   tristate "SKB priority queue scheduler (SKBPRIO)"
+   help
+ Say Y here if you want to use the SKB priority queue
+ scheduler. This schedules packets according to skb->priority,
+ which is useful for request packets in DoS mitigation systems such
+ as Gatekeeper.
+
+ To compile this driver as a module, choose M here: the module will
+ be called sch_skbprio.
+
+ If unsure, say N.
+
 config NET_SCH_CHOKE
tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 673ee7d..112ef70 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)   += sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)  += sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_SKBPRIO)  += sch_skbprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)  += sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)+= sch_codel.o
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
new file mode 100644
index 000..52c0b6d
--- /dev/null
+++ b/net/sched/sch_skbprio.c
@@ -0,0 +1,320 @@
+/*
+ * net/sched/sch_skbprio.c  SKB Priority Queue.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors:Nishanth Devarajan, 
+ * Cody Doucette, 
+ * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* SKB Priority Queue
+ * =
+ *
+ * Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes
+ * packets according to their skb->priority field. Under congestion,
+ * S

[PATCH v4 net-next] net/sched: add skbprio scheduler

2018-07-19 Thread Nishanth Devarajan
net/sched: add skbprio scheduer

Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
according to their skb->priority field. Under congestion, already-enqueued lower
priority packets will be dropped to make space available for higher priority
packets. Skbprio was conceived as a solution for denial-of-service defenses that
need to route packets with different priorities as a means to overcome DoS
attacks.

v4
*Drop Documentation/networking/sch_skbprio.txt doc file to move it to tc man
page for Skbprio, in iproute2.

v3
*Drop max_limit parameter in struct skbprio_sched_data and instead use
sch->limit.

*Reference qdisc_dev(sch)->tx_queue_len only once, during initialisation for
qdisc (previously being referenced every time qdisc changes).

*Move qdisc's detailed description from in-code to Documentation/networking.

*When qdisc is saturated, enqueue incoming packet first before dequeueing
lowest priority packet in queue - improves usage of call stack registers.

*Introduce and use overlimit stat to keep track of number of dropped packets.

v2
*Use skb->priority field rather than DS field. Rename queueing discipline as
SKB Priority Queue (previously Gatekeeper Priority Queue).

*Queueing discipline is made classful to expose Skbprio's internal priority
queues.

Signed-off-by: Nishanth Devarajan 
Reviewed-by: Sachin Paryani 
Reviewed-by: Cody Doucette 
Reviewed-by: Michel Machado 
---
 include/uapi/linux/pkt_sched.h |  15 ++
 net/sched/Kconfig  |  13 ++
 net/sched/Makefile |   1 +
 net/sched/sch_skbprio.c| 330 +
 4 files changed, 359 insertions(+)
 create mode 100644 net/sched/sch_skbprio.c

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index d9cc9dc..8975fd1 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,21 @@ struct tc_fifo_qopt {
__u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+   __u32   limit;  /* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS 16
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 7af2467..7699344 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -251,6 +251,19 @@ config NET_SCH_MQPRIO
 
  If unsure, say N.
 
+config NET_SCH_SKBPRIO
+   tristate "SKB priority queue scheduler (SKBPRIO)"
+   help
+ Say Y here if you want to use the SKB priority queue
+ scheduler. This schedules packets according to skb->priority,
+ which is useful for request packets in DoS mitigation systems such
+ as Gatekeeper.
+
+ To compile this driver as a module, choose M here: the module will
+ be called sch_skbprio.
+
+ If unsure, say N.
+
 config NET_SCH_CHOKE
tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 673ee7d..112ef70 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)   += sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)  += sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_SKBPRIO)  += sch_skbprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)  += sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)+= sch_codel.o
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
new file mode 100644
index 000..6b94f54
--- /dev/null
+++ b/net/sched/sch_skbprio.c
@@ -0,0 +1,330 @@
+/*
+ * net/sched/sch_skbprio.c  SKB Priority Queue.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors:Nishanth Devarajan, 
+ * Cody Doucette, 
+ * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* SKB Priority Queue
+ * =
+ *
+ * Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes
+ * packets according to their skb->priority field. Under congestion,
+ * Skbprio drops already-enqueued lower priority packets to make space
+ * available for higher priority packets; it was 

[PATCH v3 net-next] net/sched: add skbprio scheduler

2018-07-07 Thread Nishanth Devarajan
net/sched: add skbprio scheduer

Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
according to their skb->priority field. Under congestion, already-enqueued lower
priority packets will be dropped to make space available for higher priority
packets. Skbprio was conceived as a solution for denial-of-service defenses that
need to route packets with different priorities as a means to overcome DoS
attacks.

v3
*Drop max_limit parameter in struct skbprio_sched_data and instead use
sch->limit.

*Reference qdisc_dev(sch)->tx_queue_len only once, during initialisation for
qdisc (previously being referenced every time qdisc changes).

*Move qdisc's detailed description from in-code to Documentation/networking.

*When qdisc is saturated, enqueue incoming packet first before dequeueing
lowest priority packet in queue - improves usage of call stack registers.

*Introduce and use overlimit stat to keep track of number of dropped packets.

v2
*Use skb->priority field rather than DS field. Rename queueing discipline as
SKB Priority Queue (previously Gatekeeper Priority Queue).

*Queueing discipline is made classful to expose Skbprio's internal priority
queues.

Signed-off-by: Nishanth Devarajan 
Reviewed-by: Sachin Paryani 
Reviewed-by: Cody Doucette 
Reviewed-by: Michel Machado 
---
 Documentation/networking/sch_skbprio.txt |  24 +++
 include/uapi/linux/pkt_sched.h   |  15 ++
 net/sched/Kconfig|  13 ++
 net/sched/Makefile   |   1 +
 net/sched/sch_skbprio.c  | 330 +++
 5 files changed, 383 insertions(+)
 create mode 100644 Documentation/networking/sch_skbprio.txt
 create mode 100644 net/sched/sch_skbprio.c

diff --git a/Documentation/networking/sch_skbprio.txt 
b/Documentation/networking/sch_skbprio.txt
new file mode 100644
index 000..3aa4d3e
--- /dev/null
+++ b/Documentation/networking/sch_skbprio.txt
@@ -0,0 +1,24 @@
+SKB Priority Queue
+==
+
+This qdisc schedules a packet according to skb->priority, where a higher
+value places the packet closer to the exit of the queue. When the queue is
+full, the lowest priority packet in the queue is dropped to make room for
+the packet to be added if it has higher priority. If the packet to be added
+has lower priority than all packets in the queue, it is dropped.
+
+Without the SKB priority queue, queue length limits must be imposed
+for individual queues, and there is no easy way to enforce a global queue
+length limit across all priorities. With the SKBprio queue, a global
+queue length limit can be enforced while not restricting the queue lengths
+of individual priorities.
+
+This is especially useful for a denial-of-service defense system like
+Gatekeeper, which prioritizes packets in flows that demonstrate expected
+behavior of legitimate users. The queue is flexible to allow any number
+of packets of any priority up to the global limit of the scheduler
+without risking resource overconsumption by a flood of low priority packets.
+
+The Gatekeeper codebase is found here:
+
+   https://github.com/AltraMayor/gatekeeper
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 9491184..5c6429d 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,21 @@ struct tc_fifo_qopt {
__u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+   __u32   limit;  /* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS 16
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index fcc8970..4aa6eb0 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -251,6 +251,19 @@ config NET_SCH_MQPRIO
 
  If unsure, say N.
 
+config NET_SCH_SKBPRIO
+   tristate "SKB priority queue scheduler (SKBPRIO)"
+   help
+ Say Y here if you want to use the SKB priority queue
+ scheduler. This schedules packets according to skb->priority,
+ which is useful for request packets in DoS mitigation systems such
+ as Gatekeeper.
+
+ To compile this driver as a module, choose M here: the module will
+ be called sch_skbprio.
+
+ If unsure, say N.
+
 config NET_SCH_CHOKE
tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 9a5a707..ad5cd1e 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)   += sch_netem.o
 obj-$(CONF

Re: [PATCH v2 net-next] net/sched: add skbprio scheduler

2018-06-25 Thread Nishanth Devarajan
On Sun, Jun 24, 2018 at 11:43:07AM -0400, Jamal Hadi Salim wrote:
> On 23/06/18 04:47 PM, Nishanth Devarajan wrote:
> [..]
> 
> >+/* Drop the packet at the tail of the lowest priority qdisc. */
> >+lp_qdisc = >qdiscs[lp];
> >+to_drop = __skb_dequeue_tail(lp_qdisc);
> >+BUG_ON(!to_drop);
> >+qdisc_qstats_backlog_dec(sch, to_drop);
> >+qdisc_drop(to_drop, sch, to_free);
> >+
> 
> Maybe also increase overlimit stat here? It will keep track
> of low prio things dropped because you were congested.
> Such a stat helps when debugging or collecting analytics.
> 
> Per Alex's comment, how about:
> 
> ---
> Skbprio (SKB Priority Queue) is a queueing discipline that
> prioritizes packets according to their skb->priority field.
> Under congestion, already-enqueued lower priority packets
> will be dropped to make space available for higher priority
> packets. Skbprio was conceived as a solution for
> denial-of-service defenses that need to route packets with
> different priorities as a means to overcome DoS attacks
> as described in paper ...
> 
> 
> cheers,
> jamal

Sounds good, will make some changes in v3.

Thanks,
Nishanth


Re: [PATCH v2 net-next] net/sched: add skbprio scheduler

2018-06-25 Thread Nishanth Devarajan
On Sat, Jun 23, 2018 at 03:10:32PM -0700, Alexander Duyck wrote:
> On Sat, Jun 23, 2018 at 1:47 PM, Nishanth Devarajan  
> wrote:
> > net/sched: add skbprio scheduler
> >
> > Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes 
> > packets
> > according to their skb->priority field. Although Skbprio can be employed in 
> > any
> > scenario in which a higher skb->priority field means a higher priority 
> > packet,
> > Skbprio was concieved as a solution for denial-of-service defenses that 
> > need to
> > route packets with different priorities.
> 
> Really this description is not very good. Reading it I was thinking to
> myself "why do we need this, prio already does this". It wasn't until
> I read through the code that I figured out that you are basically
> adding dropping of lower priority frames.
> 

OK, I'll take Jamal's suggestion on this and write up a new description.

> >
> > v2
> > *Use skb->priority field rather than DS field. Rename queueing discipline as
> > SKB Priority Queue (previously Gatekeeper Priority Queue).
> >
> > *Queueing discipline is made classful to expose Skbprio's internal priority
> > queues.
> >
> > Signed-off-by: Nishanth Devarajan 
> > Reviewed-by: Sachin Paryani 
> > Reviewed-by: Cody Doucette 
> > Reviewed-by: Michel Machado 
> > ---
> >  include/uapi/linux/pkt_sched.h |  15 ++
> >  net/sched/Kconfig  |  13 ++
> >  net/sched/Makefile |   1 +
> >  net/sched/sch_skbprio.c| 347 
> > +
> >  4 files changed, 376 insertions(+)
> >  create mode 100644 net/sched/sch_skbprio.c
> >
> > diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> > index 37b5096..6fd07e8 100644
> > --- a/include/uapi/linux/pkt_sched.h
> > +++ b/include/uapi/linux/pkt_sched.h
> > @@ -124,6 +124,21 @@ struct tc_fifo_qopt {
> > __u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo 
> > */
> >  };
> >
> > +/* SKBPRIO section */
> > +
> > +/*
> > + * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
> > + * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be 
> > able
> > + * to map one to one the DS field of IPV4 and IPV6 headers.
> > + * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
> > + */
> > +
> > +#define SKBPRIO_MAX_PRIORITY 64
> > +
> > +struct tc_skbprio_qopt {
> > +   __u32   limit;  /* Queue length in packets. */
> > +};
> > +
> >  /* PRIO section */
> >
> >  #define TCQ_PRIO_BANDS 16
> > diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> > index a01169f..9ac4b53 100644
> > --- a/net/sched/Kconfig
> > +++ b/net/sched/Kconfig
> > @@ -240,6 +240,19 @@ config NET_SCH_MQPRIO
> >
> >   If unsure, say N.
> >
> > +config NET_SCH_SKBPRIO
> > +   tristate "SKB priority queue scheduler (SKBPRIO)"
> > +   help
> > + Say Y here if you want to use the SKB priority queue
> > + scheduler. This schedules packets according to skb->priority,
> > + which is useful for request packets in DoS mitigation systems such
> > + as Gatekeeper.
> > +
> > + To compile this driver as a module, choose M here: the module will
> > + be called sch_skbprio.
> > +
> > + If unsure, say N.
> > +
> >  config NET_SCH_CHOKE
> > tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
> > help
> > diff --git a/net/sched/Makefile b/net/sched/Makefile
> > index 8811d38..a4d8893 100644
> > --- a/net/sched/Makefile
> > +++ b/net/sched/Makefile
> > @@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)   += sch_netem.o
> >  obj-$(CONFIG_NET_SCH_DRR)  += sch_drr.o
> >  obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
> >  obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
> > +obj-$(CONFIG_NET_SCH_SKBPRIO)  += sch_skbprio.o
> >  obj-$(CONFIG_NET_SCH_CHOKE)+= sch_choke.o
> >  obj-$(CONFIG_NET_SCH_QFQ)  += sch_qfq.o
> >  obj-$(CONFIG_NET_SCH_CODEL)    += sch_codel.o
> > diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
> > new file mode 100644
> > index 000..5e89446
> > --- /dev/null
> > +++ b/net/sched/sch_skbprio.c
> > @@ -0,0 +1,347 @@
> > +/*
> > + * net/sched/sch_skbprio.c  SKB Priority Queue.
> > + *
> > +

Re: [PATCH v2 net-next] net/sched: add skbprio scheduler

2018-06-25 Thread Nishanth Devarajan
On Sat, Jun 23, 2018 at 02:43:16PM -0700, Cong Wang wrote:
> On Sat, Jun 23, 2018 at 1:47 PM, Nishanth Devarajan  
> wrote:
> > diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> > index 37b5096..6fd07e8 100644
> > --- a/include/uapi/linux/pkt_sched.h
> > +++ b/include/uapi/linux/pkt_sched.h
> ...
> > +#define SKBPRIO_MAX_PRIORITY 64
> > +
> > +struct tc_skbprio_qopt {
> > +   __u32   limit;  /* Queue length in packets. */
> > +};
> 
> 
> Since this is just an integer, you can just make it NLA_U32 instead
> of a struct?
> 
>

Making it NLA_U32, wouldn't that be incurring a nla_policy struct in the
code? I also feel uneasy that we'd be straying convention of having a tc qopt
struct to pass in essential parameters from userspace.

> > +static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
> > +   struct netlink_ext_ack *extack)
> > +{
> > +   struct skbprio_sched_data *q = qdisc_priv(sch);
> > +   struct tc_skbprio_qopt *ctl = nla_data(opt);
> > +   const unsigned int min_limit = 1;
> > +
> > +   if (ctl->limit == (typeof(ctl->limit))-1)
> > +   q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
> > +   else if (ctl->limit < min_limit ||
> > +   ctl->limit > qdisc_dev(sch)->tx_queue_len)
> > +   return -EINVAL;
> > +   else
> > +   q->max_limit = ctl->limit;
> > +
> > +   return 0;
> > +}
> 
> Isn't q->max_limit same with sch->limit?
>

q->max_limit was intended to represent the maximum limit that Skbprio could
accomodate i.e the tx queue len of the device attached to the qdisc, to check
the limit parameter passed from userspace. I'll correct this in v3.
 
> Also, please avoid dev->tx_queue_len here, it may change
> independently of your qdisc change, unless you want to implement
> ops->change_tx_queue_len().

OK, will make this change.


[PATCH v2 net-next] net/sched: add skbprio scheduler

2018-06-23 Thread Nishanth Devarajan
net/sched: add skbprio scheduler

Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
according to their skb->priority field. Although Skbprio can be employed in any
scenario in which a higher skb->priority field means a higher priority packet,
Skbprio was concieved as a solution for denial-of-service defenses that need to
route packets with different priorities.

v2
*Use skb->priority field rather than DS field. Rename queueing discipline as
SKB Priority Queue (previously Gatekeeper Priority Queue).

*Queueing discipline is made classful to expose Skbprio's internal priority
queues.

Signed-off-by: Nishanth Devarajan 
Reviewed-by: Sachin Paryani 
Reviewed-by: Cody Doucette 
Reviewed-by: Michel Machado 
---
 include/uapi/linux/pkt_sched.h |  15 ++
 net/sched/Kconfig  |  13 ++
 net/sched/Makefile |   1 +
 net/sched/sch_skbprio.c| 347 +
 4 files changed, 376 insertions(+)
 create mode 100644 net/sched/sch_skbprio.c

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 37b5096..6fd07e8 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,21 @@ struct tc_fifo_qopt {
__u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+   __u32   limit;  /* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS 16
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a01169f..9ac4b53 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -240,6 +240,19 @@ config NET_SCH_MQPRIO
 
  If unsure, say N.
 
+config NET_SCH_SKBPRIO
+   tristate "SKB priority queue scheduler (SKBPRIO)"
+   help
+ Say Y here if you want to use the SKB priority queue
+ scheduler. This schedules packets according to skb->priority,
+ which is useful for request packets in DoS mitigation systems such
+ as Gatekeeper.
+
+ To compile this driver as a module, choose M here: the module will
+ be called sch_skbprio.
+
+ If unsure, say N.
+
 config NET_SCH_CHOKE
tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8811d38..a4d8893 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)   += sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)  += sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_SKBPRIO)  += sch_skbprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)  += sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)+= sch_codel.o
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
new file mode 100644
index 000..5e89446
--- /dev/null
+++ b/net/sched/sch_skbprio.c
@@ -0,0 +1,347 @@
+/*
+ * net/sched/sch_skbprio.c  SKB Priority Queue.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors:Nishanth Devarajan, 
+ * Cody Doucette, 
+ * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+/*   SKB Priority Queue
+ * =
+ *
+ * This qdisc schedules a packet according to skb->priority, where a higher
+ * value places the packet closer to the exit of the queue. When the queue is
+ * full, the lowest priority packet in the queue is dropped to make room for
+ * the packet to be added if it has higher priority. If the packet to be added
+ * has lower priority than all packets in the queue, it is dropped.
+ *
+ * Without the SKB priority queue, queue length limits must be imposed
+ * for individual queues, and there is no easy way to enforce a global queue
+ * length limit across all priorities. With the SKBprio queue, a global
+ * queue length limit can be enforced while not restricting the queue lengths
+ * of individual priorities.
+ *
+ * This is especially useful for a denial-of-service defense system like
+ * Gatekeeper, which prioritizes packets in flows that demonstrate expected
+ * behavior of legitimate users. The queue is flexible to allow any number
+ * of packe

[PATCH v2 net-next] net/sched: add skbprio scheduler

2018-06-06 Thread Nishanth Devarajan
net/sched: add skbprio scheduler

Skbprio (SKB Priority Queue) is a queuing discipline that prioritizes IPv4
and IPv6 packets according to their skb->priority field. Although Skbprio can
be employed in any scenario in which a higher skb->priority field means a
higher priority packet, Skbprio was concieved as a solution for
denial-of-service defenses that need to route packets with different priorities.

v2:
*Use skb->priority field rather than DS field. Rename queueing discipline as
SKB Priority Queue (previously Gatekeeper Priority Queue).

*Queueing discipline is made classful to expose Skbprio's internal priority
queues.

Signed-off-by: Nishanth Devarajan 
Reviewed-by: Sachin Paryani 
Reviewed-by: Cody Doucette 
Reviewed-by: Michel Machado 
---
 include/uapi/linux/pkt_sched.h |  15 ++
 net/sched/Kconfig  |  13 ++
 net/sched/Makefile |   1 +
 net/sched/sch_skbprio.c| 347 +
 4 files changed, 376 insertions(+)
 create mode 100644 net/sched/sch_skbprio.c

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 37b5096..6fd07e8 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,21 @@ struct tc_fifo_qopt {
__u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+   __u32   limit;  /* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS 16
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a01169f..9ac4b53 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -240,6 +240,19 @@ config NET_SCH_MQPRIO
 
  If unsure, say N.
 
+config NET_SCH_SKBPRIO
+   tristate "SKB priority queue scheduler (SKBPRIO)"
+   help
+ Say Y here if you want to use the SKB priority queue
+ scheduler. This schedules packets according to skb->priority,
+ which is useful for request packets in DoS mitigation systems such
+ as Gatekeeper.
+
+ To compile this driver as a module, choose M here: the module will
+ be called sch_skbprio.
+
+ If unsure, say N.
+
 config NET_SCH_CHOKE
tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8811d38..a4d8893 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)   += sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)  += sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_SKBPRIO)  += sch_skbprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)  += sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)+= sch_codel.o
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
new file mode 100644
index 000..f73ad62
--- /dev/null
+++ b/net/sched/sch_skbprio.c
@@ -0,0 +1,347 @@
+/*
+ * net/sched/sch_skbprio.c  SKB Priority Queue.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors:Nishanth Devarajan, 
+ * Cody Doucette, 
+ * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+/*   SKB Priority Queue
+ * =
+ *
+ * This qdisc schedules a packet according to skb->priority, where a higher
+ * value places the packet closer to the exit of the queue. When the queue is
+ * full, the lowest priority packet in the queue is dropped to make room for
+ * the packet to be added if it has higher priority. If the packet to be added
+ * has lower priority than all packets in the queue, it is dropped.
+ *
+ * Without the SKB priority queue, queue length limits must be imposed
+ * for individual queues, and there is no easy way to enforce a global queue
+ * length limit across all priorities. With the SKBprio queue, a global
+ * queue length limit can be enforced while not restricting the queue lengths
+ * of individual priorities.
+ *
+ * This is especially useful for a denial-of-service defense system like
+ * Gatekeeper, which prioritizes packets in flows that demonstrate expected
+ * behavior of legitimate users. The queue is flexible to allow any numbe

Re: [PATCH net-next] net:sched: add gkprio scheduler

2018-05-08 Thread Nishanth Devarajan
On Mon, May 07, 2018 at 10:24:51PM -0700, Cong Wang wrote:
> On Mon, May 7, 2018 at 2:36 AM, Nishanth Devarajan <ndev2...@gmail.com> wrote:
> > net/sched: add gkprio scheduler
> >
> > Gkprio (Gatekeeper Priority Queue) is a queueing discipline that prioritizes
> > IPv4 and IPv6 packets accordingly to their DSCP field. Although Gkprio can 
> > be
> > employed in any QoS scenario in which a higher DSCP field means a higher
> > priority packet, Gkprio was concieved as a solution for denial-of-service
> > defenses that need to route packets with different priorities.
> 
> 
> Can we give it a better name? "Gatekeeper" is meaningless if we read
> it alone, it ties to your Gatekeeper project which is more than just this
> kernel module. Maybe "DS Priority Queue"?
> 

Yes, we should be able to come up with a better name, we'll work on it.

> Overall it looks good to me, just one thing below:
> 
> > +struct Qdisc_ops gkprio_qdisc_ops __read_mostly = {
> > +   .id =   "gkprio",
> > +   .priv_size  =   sizeof(struct gkprio_sched_data),
> > +   .enqueue=   gkprio_enqueue,
> > +   .dequeue=   gkprio_dequeue,
> > +   .peek   =   qdisc_peek_dequeued,
> > +   .init   =   gkprio_init,
> > +   .reset  =   gkprio_reset,
> > +   .change =   gkprio_change,
> > +   .dump   =   gkprio_dump,
> > +   .destroy=   gkprio_destroy,
> > +   .owner  =   THIS_MODULE,
> > +};
> 
> You probably want to add Qdisc_class_ops here so that you can
> dump the stats of each internal queue.

Alright, will make some changes and send in a v2.

Thanks,
Nishanth


[PATCH net-next] net:sched: add gkprio scheduler

2018-05-07 Thread Nishanth Devarajan
net/sched: add gkprio scheduler

Gkprio (Gatekeeper Priority Queue) is a queueing discipline that prioritizes
IPv4 and IPv6 packets accordingly to their DSCP field. Although Gkprio can be
employed in any QoS scenario in which a higher DSCP field means a higher
priority packet, Gkprio was concieved as a solution for denial-of-service
defenses that need to route packets with different priorities.

Signed-off-by: Nishanth Devarajan <ndev2...@gmail.com>
Reviewed-by: Cody Doucette <douce...@bu.edu>
Reviewed-by: Michel Machado <mic...@digirati.com.br>
Reviewed-by: Sachin Paryani <sachin.pary...@gmail.com>
---
 include/uapi/linux/pkt_sched.h |  11 ++
 net/sched/Kconfig  |  13 ++
 net/sched/Makefile |   1 +
 net/sched/sch_gkprio.c | 316 +
 4 files changed, 341 insertions(+)
 create mode 100644 net/sched/sch_gkprio.c

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 37b5096..de8b5ca 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,17 @@ struct tc_fifo_qopt {
__u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* GKPRIO section */
+
+struct tc_gkprio_qopt {
+   __u32   limit;  /* Queue length in packets. */
+   __u16   noip_dfltp; /* Default priority for non-IP packets. */
+
+   /* Stats. */
+   __u16 highest_prio; /* Highest priority currently in queue.  */
+   __u16 lowest_prio;  /* Lowest priority currently in queue. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS 16
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a01169f..9c47857 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -240,6 +240,19 @@ config NET_SCH_MQPRIO
 
  If unsure, say N.
 
+config NET_SCH_GKPRIO
+   tristate "Gatekeeper priority queue scheduler (GKPRIO)"
+   help
+ Say Y here if you want to use the Gatekeeper priority queue
+ scheduler. This schedules packets according to priorities based on
+ the DSCP (IPv4) and DS (IPv6) fields, which is useful for request
+ packets in DoS mitigation systems such as Gatekeeper.
+
+ To compile this driver as a module, choose M here: the module will
+ be called sch_gkprio.
+
+ If unsure, say N.
+
 config NET_SCH_CHOKE
tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8811d38..93a1fdb 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)   += sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)  += sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_GKPRIO)   += sch_gkprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)  += sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)+= sch_codel.o
diff --git a/net/sched/sch_gkprio.c b/net/sched/sch_gkprio.c
new file mode 100644
index 000..ad1227c
--- /dev/null
+++ b/net/sched/sch_gkprio.c
@@ -0,0 +1,316 @@
+/*
+ * net/sched/sch_gkprio.c  Gatekeeper Priority Queue.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors:Nishanth Devarajan, <ndev_2...@gmail.com>
+ * original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Packets are assigned priorities [0, 63] due to the IP DSCP field limits. */
+#define GKPRIO_MAX_PRIORITY 64
+
+/*   Gatekeeper Priority Queue
+ * =
+ *
+ * This qdisc schedules a packet according to the value (0-63) of its DSCP
+ * (IPv4) or DS (IPv6) field, where a higher value places the packet closer
+ * to the exit of the queue. Non-IP packets are assigned a default priority
+ * specified to GKPRIO; if none is specified, default priority is set
+ * to 0. When the queue is full, the lowest priority packet in the queue is
+ * dropped to make room for the packet to be added if it has higher priority.
+ * If the packet to be added has lower priority than all packets in the queue,
+ * it is dropped.
+ *
+ * Without the Gatekeeper priority queue, queue length limits must be imposed
+ * for individual queues, and there is no easy way to enforce a global queue
+ * length limit across all priorities. With the Gatekeeper queue, a global
+ * queue length limit can be enforced while not restricting the queue lengths
+ * of individual priorities.
+ *
+ * This is especially useful for a denial-of-service defense system; like

Re: [PATCH iproute2/net-next v3]tc: B.W limits can now be specified in %.

2017-11-24 Thread Nishanth Devarajan
On Fri, Nov 24, 2017 at 11:25:28AM -0800, Stephen Hemminger wrote:
> On Sat, 18 Nov 2017 02:13:38 +0530
> Nishanth Devarajan <ndev2...@gmail.com> wrote:
> 
> > This patch adapts the tc command line interface to allow bandwidth limits
> > to be specified as a percentage of the interface's capacity.
> > 
> > Adding this functionality requires passing the specified device string to
> > each class/qdisc which changes the prototype for a couple of functions: the
> > .parse_qopt and .parse_copt interfaces. The device string is a required
> > parameter for tc-qdisc and tc-class, and when not specified, the kernel
> > returns ENODEV. In this patch, if the user tries to specify a bandwidth
> > percentage without naming the device, we return an error from userspace.
> > 
> > v2:
> > * Modified and moved int read_prop() from ip/iptuntap.c to lib/utils.c,
> > to make it accessible to tc. 
> > 
> > v3:
> > * Modified and moved int parse_percent() from tc/q_netem.c to ib/util.c for
> > use in tc.
> > 
> > * Changed couple variable names in int parse_percent_rate().
> > 
> > * Handled showing error message when device speed is unknown.
> > 
> > * Updated man page to warn users that when specifying rates in %, tc only
> > uses the current device speed and does not recalculate if it changes after.
> > 
> > During cases when properties (like device speed) are unknown, read_prop()
> > assumes that if the property file can be opened but not read, it means
> > that the property is unknown.
> > 
> > Signed-off by: Nishanth Devarajan<ndev2...@gmail.com>
> > 
> 
> Applied, but there were three things that I needed to change:
>   1. The DCO tag is "Signed-off-by" not "Signed-off by"
>   2. The revision history should be below the cut line --- in the mail message
>  so that it doesn't end up in the commit message.
>   3. The qopt function declarations now are a really long line.
>  I will break them up.
>

Thanks for the help, and will do, I'll keep the feedback in mind for
future patches, thanks.

-Nishanth


[PATCH iproute2/net-next v4]tc: B.W limits can now be specified in %.

2017-11-20 Thread Nishanth Devarajan
This patch adapts the tc command line interface to allow bandwidth limits
to be specified as a percentage of the interface's capacity.

Adding this functionality requires passing the specified device string to
each class/qdisc which changes the prototype for a couple of functions: the
.parse_qopt and .parse_copt interfaces. The device string is a required
parameter for tc-qdisc and tc-class, and when not specified, the kernel
returns ENODEV. In this patch, if the user tries to specify a bandwidth
percentage without naming the device, we return an error from userspace.

v2:
* Modified and moved int read_prop() from ip/iptuntap.c to lib/utils.c,
to make it accessible to tc.

v3:
* Modified and moved int parse_percent() from tc/q_netem.c to ib/util.c for
use in tc.
* Changed couple variable names in int parse_percent_rate().
* Handled showing error message when device speed is unknown.
* Updated man page to warn users that when specifying rates in %, tc only
uses the current device speed and does not recalculate if it changes after.

During cases when properties (like device speed) are unknown, read_prop()
assumes that if the property file can be opened but not read, it means
that the property is unknown.

v4:
* int read_prop() in lib/utils.c was using strtoul() API, this was changed
to strtol()
* 'const' quantifier was added to device string arguments in .parse_qopt
and .parse_copt interface headers

Signed-off by: Nishanth Devarajan<ndev2...@gmail.com>
---
 include/utils.h |  2 ++
 ip/iptuntap.c   | 32 ---
 lib/utils.c | 68 +
 man/man8/tc.8   |  5 -
 tc/q_atm.c  |  4 ++--
 tc/q_cbq.c  | 25 -
 tc/q_cbs.c  |  2 +-
 tc/q_choke.c|  9 ++--
 tc/q_clsact.c   |  2 +-
 tc/q_codel.c|  2 +-
 tc/q_drr.c  |  4 ++--
 tc/q_dsmark.c   |  4 ++--
 tc/q_fifo.c |  2 +-
 tc/q_fq.c   | 16 +++---
 tc/q_fq_codel.c |  2 +-
 tc/q_gred.c |  9 ++--
 tc/q_hfsc.c | 45 +-
 tc/q_hhf.c  |  2 +-
 tc/q_htb.c  | 18 +++
 tc/q_ingress.c  |  2 +-
 tc/q_mqprio.c   |  2 +-
 tc/q_multiq.c   |  2 +-
 tc/q_netem.c| 23 ++-
 tc/q_pie.c  |  2 +-
 tc/q_prio.c |  2 +-
 tc/q_qfq.c  |  4 ++--
 tc/q_red.c  |  9 ++--
 tc/q_rr.c   |  2 +-
 tc/q_sfb.c  |  2 +-
 tc/q_sfq.c  |  2 +-
 tc/q_tbf.c  | 16 +++---
 tc/tc.c |  2 +-
 tc/tc_class.c   |  2 +-
 tc/tc_qdisc.c   |  2 +-
 tc/tc_util.c| 63 
 tc/tc_util.h|  7 --
 36 files changed, 285 insertions(+), 112 deletions(-)

diff --git a/include/utils.h b/include/utils.h
index 10749fb..9c37c61 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -88,6 +88,8 @@ int get_prefix(inet_prefix *dst, char *arg, int family);
 int mask2bits(__u32 netmask);
 int get_addr_ila(__u64 *val, const char *arg);
 
+int read_prop(const char *dev, char *prop, long *value);
+int parse_percent(double *val, const char *str);
 int get_hex(char c);
 int get_integer(int *val, const char *arg, int base);
 int get_unsigned(unsigned *val, const char *arg, int base);
diff --git a/ip/iptuntap.c b/ip/iptuntap.c
index b46e452..09f2be2 100644
--- a/ip/iptuntap.c
+++ b/ip/iptuntap.c
@@ -223,38 +223,6 @@ static int do_del(int argc, char **argv)
return tap_del_ioctl();
 }
 
-static int read_prop(char *dev, char *prop, long *value)
-{
-   char fname[IFNAMSIZ+25], buf[80], *endp;
-   ssize_t len;
-   int fd;
-   long result;
-
-   sprintf(fname, "/sys/class/net/%s/%s", dev, prop);
-   fd = open(fname, O_RDONLY);
-   if (fd < 0) {
-   if (strcmp(prop, "tun_flags"))
-   fprintf(stderr, "open %s: %s\n", fname,
-   strerror(errno));
-   return -1;
-   }
-   len = read(fd, buf, sizeof(buf)-1);
-   close(fd);
-   if (len < 0) {
-   fprintf(stderr, "read %s: %s", fname, strerror(errno));
-   return -1;
-   }
-
-   buf[len] = 0;
-   result = strtol(buf, , 0);
-   if (*endp != '\n') {
-   fprintf(stderr, "Failed to parse %s\n", fname);
-   return -1;
-   }
-   *value = result;
-   return 0;
-}
-
 static void print_flags(long flags)
 {
if (flags & IFF_TUN)
diff --git a/lib/utils.c b/lib/utils.c
index 48cead1..7ced8c0 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -38,6 +38,74 @@
 int resolve_hosts;
 int timestamp_short;
 
+int read_prop(const char *dev, char *prop, long *value)
+{
+   char fname[128], buf[80], *endp, *nl;
+   FILE *fp;
+   long result;
+   int ret;
+
+   ret = snprintf(fname, sizeof(fname), "/sys/class/net/%s/%s",
+   dev, prop);
+
+   if (ret <= 0 || ret >= sizeof(

[PATCH iproute2/net-next v3]tc: B.W limits can now be specified in %.

2017-11-17 Thread Nishanth Devarajan
This patch adapts the tc command line interface to allow bandwidth limits
to be specified as a percentage of the interface's capacity.

Adding this functionality requires passing the specified device string to
each class/qdisc which changes the prototype for a couple of functions: the
.parse_qopt and .parse_copt interfaces. The device string is a required
parameter for tc-qdisc and tc-class, and when not specified, the kernel
returns ENODEV. In this patch, if the user tries to specify a bandwidth
percentage without naming the device, we return an error from userspace.

v2:
* Modified and moved int read_prop() from ip/iptuntap.c to lib/utils.c,
to make it accessible to tc. 

v3:
* Modified and moved int parse_percent() from tc/q_netem.c to ib/util.c for
use in tc.

* Changed couple variable names in int parse_percent_rate().

* Handled showing error message when device speed is unknown.

* Updated man page to warn users that when specifying rates in %, tc only
uses the current device speed and does not recalculate if it changes after.

During cases when properties (like device speed) are unknown, read_prop()
assumes that if the property file can be opened but not read, it means
that the property is unknown.

Signed-off by: Nishanth Devarajan<ndev2...@gmail.com>

---
 include/utils.h |  2 ++
 ip/iptuntap.c   | 32 ---
 lib/utils.c | 68 +
 man/man8/tc.8   |  5 -
 tc/q_atm.c  |  2 +-
 tc/q_cbq.c  | 25 -
 tc/q_choke.c|  9 ++--
 tc/q_clsact.c   |  2 +-
 tc/q_codel.c|  2 +-
 tc/q_drr.c  |  4 ++--
 tc/q_dsmark.c   |  4 ++--
 tc/q_fifo.c |  2 +-
 tc/q_fq.c   | 16 +++---
 tc/q_fq_codel.c |  2 +-
 tc/q_gred.c |  9 ++--
 tc/q_hfsc.c | 45 +-
 tc/q_hhf.c  |  2 +-
 tc/q_htb.c  | 18 +++
 tc/q_ingress.c  |  2 +-
 tc/q_mqprio.c   |  2 +-
 tc/q_multiq.c   |  2 +-
 tc/q_netem.c| 23 ++-
 tc/q_pie.c  |  2 +-
 tc/q_prio.c |  2 +-
 tc/q_qfq.c  |  4 ++--
 tc/q_red.c  |  9 ++--
 tc/q_rr.c   |  2 +-
 tc/q_sfb.c  |  2 +-
 tc/q_sfq.c  |  2 +-
 tc/q_tbf.c  | 16 +++---
 tc/tc.c |  2 +-
 tc/tc_class.c   |  2 +-
 tc/tc_qdisc.c   |  2 +-
 tc/tc_util.c| 63 
 tc/tc_util.h|  7 --
 35 files changed, 283 insertions(+), 110 deletions(-)

diff --git a/include/utils.h b/include/utils.h
index 3d91c50..9377266 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -87,6 +87,8 @@ int get_prefix(inet_prefix *dst, char *arg, int family);
 int mask2bits(__u32 netmask);
 int get_addr_ila(__u64 *val, const char *arg);
 
+int read_prop(const char *dev, char *prop, long *value);
+int parse_percent(double *val, const char *str);
 int get_hex(char c);
 int get_integer(int *val, const char *arg, int base);
 int get_unsigned(unsigned *val, const char *arg, int base);
diff --git a/ip/iptuntap.c b/ip/iptuntap.c
index b46e452..09f2be2 100644
--- a/ip/iptuntap.c
+++ b/ip/iptuntap.c
@@ -223,38 +223,6 @@ static int do_del(int argc, char **argv)
return tap_del_ioctl();
 }
 
-static int read_prop(char *dev, char *prop, long *value)
-{
-   char fname[IFNAMSIZ+25], buf[80], *endp;
-   ssize_t len;
-   int fd;
-   long result;
-
-   sprintf(fname, "/sys/class/net/%s/%s", dev, prop);
-   fd = open(fname, O_RDONLY);
-   if (fd < 0) {
-   if (strcmp(prop, "tun_flags"))
-   fprintf(stderr, "open %s: %s\n", fname,
-   strerror(errno));
-   return -1;
-   }
-   len = read(fd, buf, sizeof(buf)-1);
-   close(fd);
-   if (len < 0) {
-   fprintf(stderr, "read %s: %s", fname, strerror(errno));
-   return -1;
-   }
-
-   buf[len] = 0;
-   result = strtol(buf, , 0);
-   if (*endp != '\n') {
-   fprintf(stderr, "Failed to parse %s\n", fname);
-   return -1;
-   }
-   *value = result;
-   return 0;
-}
-
 static void print_flags(long flags)
 {
if (flags & IFF_TUN)
diff --git a/lib/utils.c b/lib/utils.c
index 4f2fa28..9d5ba2a 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -39,6 +39,74 @@
 int resolve_hosts;
 int timestamp_short;
 
+int read_prop(const char *dev, char *prop, long *value)
+{
+   char fname[128], buf[80], *endp, *nl;
+   FILE *fp;
+   long result;
+   int ret;
+
+   ret = snprintf(fname, sizeof(fname), "/sys/class/net/%s/%s",
+   dev, prop);
+
+   if (ret <= 0 || ret >= sizeof(fname)) {
+   fprintf(stderr, "could not build pathname for property\n");
+   return -1;
+   }
+
+   fp = fopen(fname, "r");
+   if (fp == NULL) 

[PATCH iproute2/net-next v2]tc: B.W limits can now be specified in %.

2017-11-14 Thread Nishanth Devarajan
This patch adapts the tc command line interface to allow bandwidth limits
to be specified as a percentage of the interface's capacity.

For this purpose, we've modified and moved int read_prop() from
ip/iptuntap.c to lib.utils.c to make it accessible to tc.

Additionally, adding this functionality requires passing the specified
device string to each class/qdisc which changes the prototype for a
couple of functions: the .parse_qopt and .parse_copt interfaces. The
device string is a required parameter for tc-qdisc and tc-class, and when
not specified, the kernel returns ENODEV. In this patch, if the user tries
to specify a bandwidth percentage without naming the device, we return an
error from userspace.

Signed-off by: Nishanth Devarajan <ndev2...@gmail.com>
---
 include/utils.h |  1 +
 ip/iptuntap.c   | 32 
 lib/utils.c | 51 
 man/man8/tc.8   |  4 +++-
 tc/q_atm.c  |  2 +-
 tc/q_cbq.c  | 25 +-
 tc/q_choke.c|  9 ++--
 tc/q_clsact.c   |  2 +-
 tc/q_codel.c|  2 +-
 tc/q_drr.c  |  4 ++--
 tc/q_dsmark.c   |  4 ++--
 tc/q_fifo.c |  2 +-
 tc/q_fq.c   | 16 +++---
 tc/q_fq_codel.c |  2 +-
 tc/q_gred.c |  9 ++--
 tc/q_hfsc.c | 45 ++-
 tc/q_hhf.c  |  2 +-
 tc/q_htb.c  | 18 
 tc/q_ingress.c  |  2 +-
 tc/q_mqprio.c   |  2 +-
 tc/q_multiq.c   |  2 +-
 tc/q_netem.c|  9 ++--
 tc/q_pie.c  |  2 +-
 tc/q_prio.c |  2 +-
 tc/q_qfq.c  |  4 ++--
 tc/q_red.c  |  9 ++--
 tc/q_rr.c   |  2 +-
 tc/q_sfb.c  |  2 +-
 tc/q_sfq.c  |  2 +-
 tc/q_tbf.c  | 16 +++---
 tc/tc.c |  2 +-
 tc/tc_class.c   |  2 +-
 tc/tc_qdisc.c   |  2 +-
 tc/tc_util.c| 66 +
 tc/tc_util.h|  8 +--
 35 files changed, 268 insertions(+), 96 deletions(-)

diff --git a/include/utils.h b/include/utils.h
index 3d91c50..63fea7c 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -87,6 +87,7 @@ int get_prefix(inet_prefix *dst, char *arg, int family);
 int mask2bits(__u32 netmask);
 int get_addr_ila(__u64 *val, const char *arg);
 
+int read_prop(char *dev, char *prop, long *value);
 int get_hex(char c);
 int get_integer(int *val, const char *arg, int base);
 int get_unsigned(unsigned *val, const char *arg, int base);
diff --git a/ip/iptuntap.c b/ip/iptuntap.c
index b46e452..09f2be2 100644
--- a/ip/iptuntap.c
+++ b/ip/iptuntap.c
@@ -223,38 +223,6 @@ static int do_del(int argc, char **argv)
return tap_del_ioctl();
 }
 
-static int read_prop(char *dev, char *prop, long *value)
-{
-   char fname[IFNAMSIZ+25], buf[80], *endp;
-   ssize_t len;
-   int fd;
-   long result;
-
-   sprintf(fname, "/sys/class/net/%s/%s", dev, prop);
-   fd = open(fname, O_RDONLY);
-   if (fd < 0) {
-   if (strcmp(prop, "tun_flags"))
-   fprintf(stderr, "open %s: %s\n", fname,
-   strerror(errno));
-   return -1;
-   }
-   len = read(fd, buf, sizeof(buf)-1);
-   close(fd);
-   if (len < 0) {
-   fprintf(stderr, "read %s: %s", fname, strerror(errno));
-   return -1;
-   }
-
-   buf[len] = 0;
-   result = strtol(buf, , 0);
-   if (*endp != '\n') {
-   fprintf(stderr, "Failed to parse %s\n", fname);
-   return -1;
-   }
-   *value = result;
-   return 0;
-}
-
 static void print_flags(long flags)
 {
if (flags & IFF_TUN)
diff --git a/lib/utils.c b/lib/utils.c
index 4f2fa28..1332410 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -39,6 +39,57 @@
 int resolve_hosts;
 int timestamp_short;
 
+int read_prop(char *dev, char *prop, long *value)
+{
+   char fname[128], buf[80], *endp, *nl;
+   FILE *fp;
+   long result;
+   int ret;
+
+   ret = snprintf(fname, sizeof(fname), "/sys/class/net/%s/%s",
+   dev, prop);
+
+   if (ret <= 0 || ret >= sizeof(fname)) {
+   fprintf(stderr, "could not build pathname for property\n");
+   return -1;
+   }
+
+   fp = fopen(fname, "r");
+   if (fp == NULL) {
+   fprintf(stderr, "fopen %s: %s\n", fname, strerror(errno));
+   return -1;
+   }
+
+   if (!fgets(buf, sizeof(buf), fp)) {
+   fclose(fp);
+   goto out;
+   }
+
+   nl = strchr(buf, '\n');
+   if (nl)
+   *nl = '\0';
+
+   fclose(fp);
+   result = strtoul(buf, , 0);
+
+   if (buf == endp || *endp) {
+   fprintf(stderr, "value \"%s\" in file %s is not a number\n",
+   buf, fname);
+   goto out;
+   }
+
+   if (result ==

[PATCH iproute2/net-next]tc: B.W limits can now be specified in %

2017-10-28 Thread Nishanth Devarajan
This patch adapts the tc command line interface to allow bandwidth limits
to be specified as a percentage of the interface's capacity.

For this purpose, we move int read_prop() from ip/iptuntap.c to
lib/utils.c to make it accessible to tc.

Additionally, adding this functionality requires passing the specified
device string to each class/qdisc which changes the prototype for a
couple of functions: the .parse_qopt and .parse_copt interfaces. The
device string is a required parameter for tc-qdisc and tc-class, and when
not specified, the kernel return ENODEV. In this patch, if the user tries
to specify a bandwidth percentage without naming the device, we return an
error from userspace.

Signed-off-by: Nishanth Devarajan <ndev2...@gmail.com>
---
 include/utils.h |  1 +
 ip/iptuntap.c   | 32 
 lib/utils.c | 32 
 man/man8/tc.8   |  4 +++-
 tc/q_atm.c  |  2 +-
 tc/q_cbq.c  | 25 -
 tc/q_choke.c|  9 +++--
 tc/q_clsact.c   |  2 +-
 tc/q_codel.c|  2 +-
 tc/q_drr.c  |  4 ++--
 tc/q_dsmark.c   |  4 ++--
 tc/q_fifo.c |  2 +-
 tc/q_fq.c   | 16 +---
 tc/q_fq_codel.c |  2 +-
 tc/q_gred.c |  9 +++--
 tc/q_hfsc.c | 45 ++---
 tc/q_hhf.c  |  2 +-
 tc/q_htb.c  | 18 ++
 tc/q_ingress.c  |  2 +-
 tc/q_mqprio.c   |  2 +-
 tc/q_multiq.c   |  2 +-
 tc/q_netem.c|  9 +++--
 tc/q_pie.c  |  2 +-
 tc/q_prio.c |  2 +-
 tc/q_qfq.c  |  4 ++--
 tc/q_red.c  |  9 +++--
 tc/q_rr.c   |  2 +-
 tc/q_sfb.c  |  2 +-
 tc/q_sfq.c  |  2 +-
 tc/q_tbf.c  | 16 +---
 tc/tc.c |  2 +-
 tc/tc_class.c   |  2 +-
 tc/tc_qdisc.c   |  2 +-
 tc/tc_util.c| 54 ++
 tc/tc_util.h|  8 ++--
 35 files changed, 237 insertions(+), 96 deletions(-)

diff --git a/include/utils.h b/include/utils.h
index 3d91c50..63fea7c 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -87,6 +87,7 @@ int get_prefix(inet_prefix *dst, char *arg, int family);
 int mask2bits(__u32 netmask);
 int get_addr_ila(__u64 *val, const char *arg);
 
+int read_prop(char *dev, char *prop, long *value);
 int get_hex(char c);
 int get_integer(int *val, const char *arg, int base);
 int get_unsigned(unsigned *val, const char *arg, int base);
diff --git a/ip/iptuntap.c b/ip/iptuntap.c
index b46e452..09f2be2 100644
--- a/ip/iptuntap.c
+++ b/ip/iptuntap.c
@@ -223,38 +223,6 @@ static int do_del(int argc, char **argv)
return tap_del_ioctl();
 }
 
-static int read_prop(char *dev, char *prop, long *value)
-{
-   char fname[IFNAMSIZ+25], buf[80], *endp;
-   ssize_t len;
-   int fd;
-   long result;
-
-   sprintf(fname, "/sys/class/net/%s/%s", dev, prop);
-   fd = open(fname, O_RDONLY);
-   if (fd < 0) {
-   if (strcmp(prop, "tun_flags"))
-   fprintf(stderr, "open %s: %s\n", fname,
-   strerror(errno));
-   return -1;
-   }
-   len = read(fd, buf, sizeof(buf)-1);
-   close(fd);
-   if (len < 0) {
-   fprintf(stderr, "read %s: %s", fname, strerror(errno));
-   return -1;
-   }
-
-   buf[len] = 0;
-   result = strtol(buf, , 0);
-   if (*endp != '\n') {
-   fprintf(stderr, "Failed to parse %s\n", fname);
-   return -1;
-   }
-   *value = result;
-   return 0;
-}
-
 static void print_flags(long flags)
 {
if (flags & IFF_TUN)
diff --git a/lib/utils.c b/lib/utils.c
index ac155bf..444c978 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -39,6 +39,38 @@
 
 int timestamp_short;
 
+int read_prop(char *dev, char *prop, long *value)
+{
+   char fname[41], buf[80], *endp;
+   ssize_t len;
+   int fd;
+   long result;
+
+   sprintf(fname, "/sys/class/net/%s/%s", dev, prop);
+   fd = open(fname, O_RDONLY);
+   if (fd < 0) {
+   if (strcmp(prop, "tun_flags"))
+   fprintf(stderr, "open %s: %s\n", fname,
+   strerror(errno));
+   return -1;
+   }
+   len = read(fd, buf, sizeof(buf)-1);
+   close(fd);
+   if (len < 0) {
+   fprintf(stderr, "read %s: %s", fname, strerror(errno));
+   return -1;
+   }
+
+   buf[len] = 0;
+   result = strtol(buf, , 0);
+   if (*endp != '\n') {
+   fprintf(stderr, "Failed to parse %s\n", fname);
+   return -1;
+   }
+   *value = result;
+   return 0;
+}
+
 int get_hex(char c)
 {
if (c >= 'A' && c <= 'F')
diff --git a/man/man8/tc.8 b/man/man8/tc.8
index f96911a..22f699b 100644
--- a/man/man8/tc.8
+++ b/man/man8/tc.8
@@ -443,7 +443,9 @@ s