dscp ranges
----------
This property controls which dscp values the processes in a cgroup are
allowed to use. A process in a cgroup will receive an EACCES error if it
tries to do any of these things:
* set a socket's IP_TOS option to a value whose dscp field (bits 7:2) is
  outside the range
* use a socket to send a message in which the IP_TOS ancillary data is
  set to a value whose dscp field is outside the range

This property is exposed to userspace through the 'net.dscp_ranges' file,
similar to the bind and listen port ranges.

Tested: wrote python to attempt to setsockopt the IP_TOS option to a
value with an out-of-range dscp field, and expect a failure

Signed-off-by: Anoop Naravaram <anarava...@google.com>
---
 Documentation/cgroup-v1/net.txt | 14 ++++++++++++++
 include/net/net_cgroup.h        |  6 ++++++
 net/core/net_cgroup.c           | 34 ++++++++++++++++++++++++++++++++--
 net/ipv4/ip_sockglue.c          | 13 +++++++++++++
 net/ipv6/datagram.c             |  9 +++++++++
 net/ipv6/ipv6_sockglue.c        |  8 ++++++++
 6 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/Documentation/cgroup-v1/net.txt b/Documentation/cgroup-v1/net.txt
index a14fd1c..ea2f1db 100644
--- a/Documentation/cgroup-v1/net.txt
+++ b/Documentation/cgroup-v1/net.txt
@@ -30,6 +30,20 @@ This property is exposed to userspace through the 
'net.listen_port_ranges' file,
 as ranges of ports that the processes can listen on (as described in the HOW TO
 INTERACT WITH RANGES FILES section).
 
+dscp ranges
+-----------
+This property controls which dscp values the processes in a cgroup are
+allowed to use. A process in a cgroup will receive an EACCES error if it
+tries to do any of these things:
+* set a socket's IP_TOS option to a value whose dscp field (bits 7:2) is
+  outside the range
+* use a socket to send a message in which the IP_TOS ancillary data is
+  set to a value whose dscp field is outside the range
+
+This property is exposed to userspace through the 'net.dscp_ranges' file, as
+ranges of dscp values that the process can use (as described in the HOW TO
+INTERACT WITH RANGES FILES section).
+
 udp port usage and limit
 ------------------------
 This property controls the limit of udp ports that can be used by the
diff --git a/include/net/net_cgroup.h b/include/net/net_cgroup.h
index 25a9def..d89e98d 100644
--- a/include/net/net_cgroup.h
+++ b/include/net/net_cgroup.h
@@ -23,6 +23,7 @@
 enum {
        NETCG_LISTEN_RANGES,
        NETCG_BIND_RANGES,
+       NETCG_DSCP_RANGES,
        NETCG_NUM_RANGE_TYPES
 };
 
@@ -73,6 +74,7 @@ struct net_cgroup {
 
 bool net_cgroup_bind_allowed(u16 port);
 bool net_cgroup_listen_allowed(u16 port);
+bool net_cgroup_dscp_allowed(u8 dscp);
 bool net_cgroup_acquire_udp_port(void);
 void net_cgroup_release_udp_port(void);
 
@@ -85,6 +87,10 @@ static inline bool net_cgroup_listen_allowed(u16 port)
 {
        return true;
 }
+static inline bool net_cgroup_dscp_allowed(u8 dscp)
+{
+       return true;
+}
 static inline bool net_cgroup_acquire_udp_port(void)
 {
        return true;
diff --git a/net/core/net_cgroup.c b/net/core/net_cgroup.c
index 2f58e13..73dc5e7 100644
--- a/net/core/net_cgroup.c
+++ b/net/core/net_cgroup.c
@@ -21,6 +21,9 @@
 #define MIN_PORT_VALUE         0
 #define MAX_PORT_VALUE         65535
 
+#define MIN_DSCP_VALUE         0
+#define MAX_DSCP_VALUE         63
+
 /* Deriving MAX_ENTRIES from MAX_WRITE_SIZE as a rough estimate */
 #define MAX_ENTRIES ((MAX_WRITE_SIZE - offsetof(struct net_ranges, range)) /   
\
                     BYTES_PER_ENTRY)
@@ -161,7 +164,10 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
                                MIN_PORT_VALUE, MAX_PORT_VALUE) ||
                    alloc_init_net_ranges(
                                &netcg->whitelists[NETCG_LISTEN_RANGES],
-                               MIN_PORT_VALUE, MAX_PORT_VALUE)) {
+                               MIN_PORT_VALUE, MAX_PORT_VALUE) ||
+                   alloc_init_net_ranges(
+                               &netcg->whitelists[NETCG_DSCP_RANGES],
+                               MIN_DSCP_VALUE, MAX_DSCP_VALUE)) {
                        free_net_cgroup(netcg);
                        /* if any of these cause an error, return ENOMEM */
                        return ERR_PTR(-ENOMEM);
@@ -178,7 +184,11 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
                    alloc_copy_net_ranges(
                                &netcg->whitelists[NETCG_LISTEN_RANGES],
                                MIN_PORT_VALUE, MAX_PORT_VALUE,
-                               
&parent_netcg->whitelists[NETCG_LISTEN_RANGES])) {
+                               &parent_netcg->whitelists[NETCG_LISTEN_RANGES]) 
||
+                   alloc_copy_net_ranges(
+                               &netcg->whitelists[NETCG_DSCP_RANGES],
+                               MIN_DSCP_VALUE, MAX_DSCP_VALUE,
+                               &parent_netcg->whitelists[NETCG_DSCP_RANGES])) {
                        free_net_cgroup(netcg);
                        /* if any of these cause an error, return ENOMEM */
                        return ERR_PTR(-ENOMEM);
@@ -237,6 +247,12 @@ bool net_cgroup_listen_allowed(u16 port)
 }
 EXPORT_SYMBOL_GPL(net_cgroup_listen_allowed);
 
+bool net_cgroup_dscp_allowed(u8 dscp)
+{
+       return net_cgroup_value_allowed(dscp, NETCG_DSCP_RANGES);
+}
+EXPORT_SYMBOL_GPL(net_cgroup_dscp_allowed);
+
 static s64 net_udp_read_s64(struct cgroup_subsys_state *css, struct cftype 
*cft)
 {
        struct  net_cgroup *netcg = css_to_net_cgroup(css);
@@ -634,6 +650,20 @@ static struct cftype ss_files[] = {
                .max_write_len  = MAX_WRITE_SIZE,
        },
        {
+               .name           = "dscp_ranges",
+               .flags          = CFTYPE_ONLY_ON_ROOT,
+               .seq_show       = net_read_ranges,
+               .private        = NETCG_DSCP_RANGES,
+       },
+       {
+               .name           = "dscp_ranges",
+               .flags          = CFTYPE_NOT_ON_ROOT,
+               .seq_show       = net_read_ranges,
+               .write          = net_write_ranges,
+               .private        = NETCG_DSCP_RANGES,
+               .max_write_len  = MAX_WRITE_SIZE,
+       },
+       {
                .name           = "udp_limit",
                .flags          = CFTYPE_ONLY_ON_ROOT,
                .read_s64       = net_udp_read_s64,
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 71a52f4d..71a4297 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -42,6 +42,7 @@
 #include <net/transp_v6.h>
 #endif
 #include <net/ip_fib.h>
+#include <net/net_cgroup.h>
 
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
@@ -289,6 +290,11 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, 
struct ipcm_cookie *ipc,
                        val = *(int *)CMSG_DATA(cmsg);
                        if (val < 0 || val > 255)
                                return -EINVAL;
+                       /* val is 8-bit tos, we need to rightshift 2 to get the
+                        * 6-bit dscp field
+                        */
+                       if (!net_cgroup_dscp_allowed(val >> 2))
+                               return -EACCES;
                        ipc->tos = val;
                        ipc->priority = rt_tos2priority(ipc->tos);
                        break;
@@ -727,6 +733,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
                        val &= ~INET_ECN_MASK;
                        val |= inet->tos & INET_ECN_MASK;
                }
+               /* val is 8-bit tos, we need to rightshift 2 to get the
+                * 6-bit dscp field
+                */
+               if (!net_cgroup_dscp_allowed(val >> 2)) {
+                       err = -EACCES;
+                       break;
+               }
                if (inet->tos != val) {
                        inet->tos = val;
                        sk->sk_priority = rt_tos2priority(val);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 37874e2..9053b83 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -34,6 +34,7 @@
 
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
+#include <net/net_cgroup.h>
 
 static bool ipv6_mapped_addr_any(const struct in6_addr *a)
 {
@@ -973,6 +974,14 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
                        if (tc < -1 || tc > 0xff)
                                goto exit_f;
 
+                       /* tc is 8-bit tclass, we need to rightshift 2 to get
+                        * the 6-bit dscp field
+                        */
+                       if (!net_cgroup_dscp_allowed(tc >> 2)) {
+                               err = -EACCES;
+                               goto exit_f;
+                       }
+
                        err = 0;
                        ipc6->tclass = tc;
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a9895e1..eac3f88 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -52,6 +52,7 @@
 #include <net/udplite.h>
 #include <net/xfrm.h>
 #include <net/compat.h>
+#include <net/net_cgroup.h>
 
 #include <asm/uaccess.h>
 
@@ -339,6 +340,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, 
int optname,
                /* RFC 3542, 6.5: default traffic class of 0x0 */
                if (val == -1)
                        val = 0;
+               /* val is 8-bit tclass, we need to rightshift 2 to get the 6-bit
+                * dscp field
+                */
+               if (!net_cgroup_dscp_allowed(val >> 2)) {
+                       retv = -EACCES;
+                       break;
+               }
                np->tclass = val;
                retv = 0;
                break;
-- 
2.8.0.rc3.226.g39d4020

Reply via email to