This patch provides the extensions for CKRM to track per socket classes.
This is the base to enable socket based resource control for inbound
connection control, bandwidth control etc.

Signed-Off-By: Vivek Kashyap <[EMAIL PROTECTED]>
Signed-Off-By: Gerrit Huizenga <[EMAIL PROTECTED]>
Signed-off-by: Nishanth Aravamudan <[EMAIL PROTECTED]>

Index: linux-2.6.12-rc1/fs/rcfs/Makefile
===================================================================
--- linux-2.6.12-rc1.orig/fs/rcfs/Makefile      2005-03-18 15:16:33.370482769 
-0800
+++ linux-2.6.12-rc1/fs/rcfs/Makefile   2005-03-18 15:16:37.387163297 -0800
@@ -6,3 +6,4 @@ obj-$(CONFIG_RCFS_FS) += rcfs.o 
 
 rcfs-y := super.o inode.o dir.o rootdir.o magic.o
 rcfs-$(CONFIG_CKRM_TYPE_TASKCLASS) += tc_magic.o
+rcfs-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += socket_fs.o
Index: linux-2.6.12-rc1/fs/rcfs/rootdir.c
===================================================================
--- linux-2.6.12-rc1.orig/fs/rcfs/rootdir.c     2005-03-18 15:16:33.372482610 
-0800
+++ linux-2.6.12-rc1/fs/rcfs/rootdir.c  2005-03-18 15:16:37.387163297 -0800
@@ -187,6 +187,10 @@ EXPORT_SYMBOL_GPL(rcfs_deregister_classt
 extern struct rcfs_mfdesc tc_mfdesc;
 #endif
 
+#ifdef CONFIG_CKRM_TYPE_SOCKETCLASS
+extern struct rcfs_mfdesc rcfs_sock_mfdesc;
+#endif
+
 /* Common root and magic file entries.
  * root name, root permissions, magic file names and magic file permissions 
  * are needed by all entities (classtypes and classification engines) existing 
@@ -203,4 +207,10 @@ struct rcfs_mfdesc *genmfdesc[CKRM_MAX_C
 #else
        NULL,
 #endif
+#ifdef CONFIG_CKRM_TYPE_SOCKETCLASS
+       &rcfs_sock_mfdesc,
+#else
+       NULL,
+#endif
+
 };
Index: linux-2.6.12-rc1/fs/rcfs/socket_fs.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.12-rc1/fs/rcfs/socket_fs.c        2005-03-18 15:16:37.391162979 
-0800
@@ -0,0 +1,280 @@
+/* ckrm_socketaq.c 
+ *
+ * Copyright (C) Vivek Kashyap,      IBM Corp. 2004
+ * 
+ * Latest version, more details at http://ckrm.sf.net
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+/*******************************************************************************
+ *  Socket class type
+ *   
+ * Defines the root structure for socket based classes. Currently only inbound
+ * connection control is supported based on prioritized accept queues. 
+ 
******************************************************************************/
+
+#include <linux/rcfs.h>
+#include <net/tcp.h>
+
+extern int rcfs_create_noperm(struct inode *, struct dentry *, int,
+                      struct nameidata *);
+extern int rcfs_symlink_noperm(struct inode *, struct dentry *, const char *);
+extern int rcfs_mkdir_noperm(struct inode *, struct dentry *, int);
+extern int rcfs_rmdir_noperm(struct inode *, struct dentry *);
+extern int rcfs_link_noperm(struct dentry *, struct inode *, struct dentry *);
+extern int rcfs_unlink_noperm(struct inode *, struct dentry *);
+extern int rcfs_mknod_noperm(struct inode *, struct dentry *, int mode, dev_t);
+
+extern int rcfs_rmdir(struct inode *, struct dentry *);
+extern int rcfs_unlink(struct inode *, struct dentry *);
+extern int rcfs_rename(struct inode *, struct dentry *, struct inode *,
+                      struct dentry *);
+
+extern int rcfs_create_coredir(struct inode *, struct dentry *);
+
+int rcfs_sock_mkdir(struct inode *, struct dentry *, int mode);
+int rcfs_sock_rmdir(struct inode *, struct dentry *);
+struct inode_operations my_iops;
+struct inode_operations class_iops;
+struct inode_operations sub_iops;
+ 
+
+struct rcfs_magf def_magf = {
+       .mode = RCFS_DEFAULT_DIR_MODE,
+       .i_op = &sub_iops,
+       .i_fop = NULL,
+};
+
+struct rcfs_magf rcfs_sock_rootdesc[] = {
+       {
+        /* .name = should not be set, copy from classtype name, */
+        .mode = RCFS_DEFAULT_DIR_MODE,
+        .i_op = &my_iops,
+        /* .i_fop   = &simple_dir_operations, */
+        .i_fop = NULL,
+        },
+       {
+        .name = "members",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &members_fileops,
+        },
+       {
+        .name = "target",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &target_fileops,
+        },
+       {
+        .name = "reclassify",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &reclassify_fileops,
+        },
+};
+
+struct rcfs_magf rcfs_sock_magf[] = {
+       {
+        .name = "config",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &config_fileops,
+        },
+       {
+        .name = "members",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &members_fileops,
+        },
+       {
+        .name = "shares",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &shares_fileops,
+        },
+       {
+        .name = "stats",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &stats_fileops,
+        },
+       {
+        .name = "target",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &target_fileops,
+        },
+};
+
+struct rcfs_magf sub_magf[] = {
+       {
+        .name = "config",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &config_fileops,
+        },
+       {
+        .name = "shares",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &shares_fileops,
+        },
+       {
+        .name = "stats",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &stats_fileops,
+        },
+};
+
+struct rcfs_mfdesc rcfs_sock_mfdesc = {
+       .rootmf = rcfs_sock_rootdesc,
+       .rootmflen = (sizeof(rcfs_sock_rootdesc) / sizeof(struct rcfs_magf)),
+};
+
+#define SOCK_MAX_MAGF (sizeof(rcfs_sock_magf)/sizeof(struct rcfs_magf))
+#define LAQ_MAX_SUBMAGF (sizeof(sub_magf)/sizeof(struct rcfs_magf))
+
+int rcfs_sock_rmdir(struct inode *p, struct dentry *me)
+{
+       struct dentry *mftmp, *mfdentry;
+       int ret = 0;
+
+       /* delete all magic sub directories */
+       list_for_each_entry_safe(mfdentry, mftmp, &me->d_subdirs, d_child) {
+               if (S_ISDIR(mfdentry->d_inode->i_mode)) {
+                       ret = rcfs_rmdir(me->d_inode, mfdentry);
+                       if (ret)
+                               return ret;
+               }
+       }
+       /* delete ourselves */
+       ret = rcfs_rmdir(p, me);
+
+       return ret;
+}
+
+#ifdef NUM_ACCEPT_QUEUES
+#define LAQ_NUM_ACCEPT_QUEUES NUM_ACCEPT_QUEUES
+#else
+#define LAQ_NUM_ACCEPT_QUEUES 0
+#endif
+
+int rcfs_sock_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+       int retval = 0;
+       int i, j;
+       struct dentry *pentry, *mfdentry;
+
+       if (rcfs_mknod(dir, dentry, mode | S_IFDIR, 0)) {
+               printk(KERN_ERR "rcfs_sock_mkdir: error reaching parent\n");
+               return retval;
+       }
+       /* Needed if only rcfs_mknod is used instead of i_op->mkdir */
+       dir->i_nlink++;
+
+       retval = rcfs_create_coredir(dir, dentry);
+       if (retval)
+               goto mkdir_err;
+
+       /* create the default set of magic files */
+       for (i = 0; i < SOCK_MAX_MAGF; i++) {
+               mfdentry = rcfs_create_internal(dentry, &rcfs_sock_magf[i], 0);
+               mfdentry->d_fsdata = &RCFS_IS_MAGIC;
+               rcfs_get_inode_info(mfdentry->d_inode)->core = 
+                       rcfs_get_inode_info(dentry->d_inode)->core;
+               rcfs_get_inode_info(mfdentry->d_inode)->mfdentry = mfdentry;
+               if (rcfs_sock_magf[i].i_fop)
+                       mfdentry->d_inode->i_fop = rcfs_sock_magf[i].i_fop;
+               if (rcfs_sock_magf[i].i_op)
+                       mfdentry->d_inode->i_op = rcfs_sock_magf[i].i_op;
+       }
+
+       for (i = 1; i < LAQ_NUM_ACCEPT_QUEUES; i++) {
+               j = sprintf(def_magf.name, "%d", i);
+               def_magf.name[j] = '\0';
+
+               pentry = rcfs_create_internal(dentry, &def_magf, 0);
+               retval = rcfs_create_coredir(dentry->d_inode, pentry);
+               if (retval)
+                       goto mkdir_err;
+               pentry->d_fsdata = &RCFS_IS_MAGIC;
+               for (j = 0; j < LAQ_MAX_SUBMAGF; j++) {
+                       mfdentry =
+                           rcfs_create_internal(pentry, &sub_magf[j], 0);
+                       mfdentry->d_fsdata = &RCFS_IS_MAGIC;
+                       rcfs_get_inode_info(mfdentry->d_inode)->core =
+                           rcfs_get_inode_info(pentry->d_inode)->core;
+                       rcfs_get_inode_info(mfdentry->d_inode)->mfdentry =
+                                mfdentry;
+                       if (sub_magf[j].i_fop)
+                               mfdentry->d_inode->i_fop = sub_magf[j].i_fop;
+                       if (sub_magf[j].i_op)
+                               mfdentry->d_inode->i_op = sub_magf[j].i_op;
+               }
+               pentry->d_inode->i_op = &sub_iops;
+       }
+       dentry->d_inode->i_op = &class_iops;
+       return 0;
+
+      mkdir_err:
+       /* Needed */
+       dir->i_nlink--;
+       return retval;
+}
+
+char *rcfs_sock_get_name(struct ckrm_core_class *c)
+{
+       char *p = (char *)c->name;
+
+       while (*p)
+               p++;
+       while (*p != '/' && p != c->name)
+               p--;
+
+       return ++p;
+}
+
+
+
+struct inode_operations my_iops = {
+       .create = rcfs_create_noperm,
+       .lookup = simple_lookup,
+       .link = rcfs_link_noperm,
+       .unlink = rcfs_unlink,
+       .symlink = rcfs_symlink_noperm,
+       .mkdir = rcfs_sock_mkdir,
+       .rmdir = rcfs_sock_rmdir,
+       .mknod = rcfs_mknod_noperm,
+       .rename = rcfs_rename,
+};
+
+struct inode_operations class_iops = {
+       .create = rcfs_create_noperm,
+       .lookup = simple_lookup,
+       .link = rcfs_link_noperm,
+       .unlink = rcfs_unlink_noperm,
+       .symlink = rcfs_symlink_noperm,
+       .mkdir = rcfs_mkdir_noperm,
+       .rmdir = rcfs_rmdir_noperm,
+       .mknod = rcfs_mknod_noperm,
+       .rename = rcfs_rename,
+};
+
+struct inode_operations sub_iops = {
+       .create = rcfs_create_noperm,
+       .lookup = simple_lookup,
+       .link = rcfs_link_noperm,
+       .unlink = rcfs_unlink_noperm,
+       .symlink = rcfs_symlink_noperm,
+       .mkdir = rcfs_mkdir_noperm,
+       .rmdir = rcfs_rmdir_noperm,
+       .mknod = rcfs_mknod_noperm,
+       .rename = rcfs_rename,
+};
+
Index: linux-2.6.12-rc1/include/linux/ckrm_net.h
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.12-rc1/include/linux/ckrm_net.h   2005-03-18 15:16:37.392162899 
-0800
@@ -0,0 +1,42 @@
+/* ckrm_rc.h - Header file to be used by Resource controllers of CKRM
+ *
+ * Copyright (C) Vivek Kashyap , IBM Corp. 2004
+ * 
+ * Provides data structures, macros and kernel API of CKRM for 
+ * resource controllers.
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#ifndef _LINUX_CKRM_NET_H
+#define _LINUX_CKRM_NET_H
+
+struct ckrm_sock_class;
+
+struct ckrm_net_struct {
+       int ns_type;            /* type of net class */
+       struct sock *ns_sk;     /* pointer to socket */
+       pid_t ns_tgid;          /* real process id */
+       pid_t ns_pid;           /* calling thread's pid */
+       struct task_struct *ns_tsk;
+       int ns_family;          /* IPPROTO_IPV4 || IPPROTO_IPV6 */
+                               /* Currently only IPV4 is supported */
+       union {
+               __u32 ns_dipv4; /* V4 listener's address */
+       } ns_daddr;
+       __u16 ns_dport;         /* listener's port */
+       __u16 ns_sport;         /* sender's port */
+       atomic_t ns_refcnt;
+       struct ckrm_sock_class *core;
+       struct list_head ckrm_link;
+};
+
+#define ns_daddrv4     ns_daddr.ns_dipv4
+
+#endif
Index: linux-2.6.12-rc1/include/net/sock.h
===================================================================
--- linux-2.6.12-rc1.orig/include/net/sock.h    2005-03-17 17:34:23.000000000 
-0800
+++ linux-2.6.12-rc1/include/net/sock.h 2005-03-18 15:16:37.393162820 -0800
@@ -112,6 +112,8 @@ struct sock_common {
        atomic_t                skc_refcnt;
 };
 
+struct ckrm_net_struct;
+
 /**
   *    struct sock - network layer representation of sockets
   *    @__sk_common - shared layout with tcp_tw_bucket
@@ -236,6 +238,7 @@ struct sock {
        struct timeval          sk_stamp;
        struct socket           *sk_socket;
        void                    *sk_user_data;
+       struct ckrm_net_struct  *sk_ckrm_ns;
        struct module           *sk_owner;
        struct page             *sk_sndmsg_page;
        struct sk_buff          *sk_send_head;
Index: linux-2.6.12-rc1/include/net/tcp.h
===================================================================
--- linux-2.6.12-rc1.orig/include/net/tcp.h     2005-03-17 17:33:53.000000000 
-0800
+++ linux-2.6.12-rc1/include/net/tcp.h  2005-03-18 15:16:37.396162581 -0800
@@ -800,6 +800,7 @@ extern int                  tcp_rcv_established(struct 
 
 extern void                    tcp_rcv_space_adjust(struct sock *sk);
 
+
 enum tcp_ack_state_t
 {
        TCP_ACK_SCHED = 1,
@@ -930,6 +931,9 @@ extern void                 tcp_unhash(struct sock *sk
 
 extern int                     tcp_v4_hash_connecting(struct sock *sk);
 
+extern struct sock *           tcp_v4_lookup_listener(u32 daddr,
+                                                   unsigned short hnum,
+                                                   int dif);
 
 /* From syncookies.c */
 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 
Index: linux-2.6.12-rc1/init/Kconfig
===================================================================
--- linux-2.6.12-rc1.orig/init/Kconfig  2005-03-18 15:16:33.376482292 -0800
+++ linux-2.6.12-rc1/init/Kconfig       2005-03-18 15:16:37.397162502 -0800
@@ -174,6 +174,17 @@ config CKRM_TYPE_TASKCLASS
        
          Say Y if unsure 
 
+config CKRM_TYPE_SOCKETCLASS
+       bool "Class Manager for socket groups"
+       depends on CKRM && RCFS_FS
+       default y
+       help
+         SOCKET provides the extensions for CKRM to track per socket
+         classes.  This is the base to enable socket based resource 
+         control for inbound connection control, bandwidth control etc.
+       
+         Say Y if unsure.  
+
 endmenu
 
 config SYSCTL
Index: linux-2.6.12-rc1/kernel/ckrm/ckrm_sockc.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.12-rc1/kernel/ckrm/ckrm_sockc.c   2005-03-18 15:16:37.399162343 
-0800
@@ -0,0 +1,559 @@
+/* ckrm_sock.c - Class-based Kernel Resource Management (CKRM)
+ *
+ * Copyright (C) Hubertus Franke, IBM Corp. 2003,2004
+ *           (C) Shailabh Nagar,  IBM Corp. 2003
+ *           (C) Chandra Seetharaman,  IBM Corp. 2003
+ *          (C) Vivek Kashyap, IBM Corp. 2004
+ * 
+ * 
+ * Provides kernel API of CKRM for in-kernel,per-resource controllers 
+ * (one each for cpu, memory, io, network) and callbacks for 
+ * classification modules.
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include <linux/mm.h>
+#include <asm/errno.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/ckrm_rc.h>
+#include <linux/parser.h>
+#include <net/tcp.h>
+
+#include <linux/ckrm_net.h>
+
+struct ckrm_sock_class {
+       struct ckrm_core_class core;
+};
+
+static struct ckrm_sock_class ckrm_sockclass_dflt_class = {
+};
+
+#define SOCKET_CLASS_TYPE_NAME  "socketclass"
+
+const char *dflt_sockclass_name = SOCKET_CLASS_TYPE_NAME;
+
+static struct ckrm_core_class *ckrm_sock_alloc_class(struct ckrm_core_class 
*parent,
+                                               const char *name);
+static int ckrm_sock_free_class(struct ckrm_core_class *core);
+
+static int ckrm_sock_forced_reclassify(struct ckrm_core_class * target,
+                                 const char *resname);
+static int ckrm_sock_show_members(struct ckrm_core_class *core,
+                            struct seq_file *seq);
+static void ckrm_sock_add_resctrl(struct ckrm_core_class *core, int resid);
+static void ckrm_sock_reclassify_class(struct ckrm_sock_class *cls);
+
+struct ckrm_classtype ct_sockclass = {
+       .mfidx = 1,
+       .name = SOCKET_CLASS_TYPE_NAME,
+       .type_id = CKRM_CLASSTYPE_SOCKET_CLASS,
+       .maxdepth = 3,
+       .resid_reserved = 0,
+       .max_res_ctlrs = CKRM_MAX_RES_CTLRS,
+       .max_resid = 0,
+       .bit_res_ctlrs = 0L,
+       .res_ctlrs_lock = SPIN_LOCK_UNLOCKED,
+       .classes = LIST_HEAD_INIT(ct_sockclass.classes),
+
+       .default_class = &ckrm_sockclass_dflt_class.core,
+
+       /* private version of functions */
+       .alloc = &ckrm_sock_alloc_class,
+       .free = &ckrm_sock_free_class,
+       .show_members = &ckrm_sock_show_members,
+       .forced_reclassify = &ckrm_sock_forced_reclassify,
+
+       /* use of default functions */
+       .show_shares = &ckrm_class_show_shares,
+       .show_stats = &ckrm_class_show_stats,
+       .show_config = &ckrm_class_show_config,
+       .set_config = &ckrm_class_set_config,
+       .set_shares = &ckrm_class_set_shares,
+       .reset_stats = &ckrm_class_reset_stats,
+
+       /* Mandatory private version.  No default available */
+       .add_resctrl = &ckrm_sock_add_resctrl,
+};
+
+/* helper functions */
+
+void ckrm_ns_hold(struct ckrm_net_struct *ns)
+{
+       atomic_inc(&ns->ns_refcnt);
+       return;
+}
+
+void ckrm_ns_put(struct ckrm_net_struct *ns)
+{
+       if (atomic_dec_and_test(&ns->ns_refcnt))
+               kfree(ns);
+       return;
+}
+
+/*
+ * Change the class of a netstruct 
+ *
+ * Change the task's task class  to "newcls" if the task's current 
+ * class (task->taskclass) is same as given "oldcls", if it is non-NULL.
+ *
+ */
+
+static void
+ckrm_sock_set_class(struct ckrm_net_struct *ns, struct ckrm_sock_class *newcls,
+              struct ckrm_sock_class *oldcls, enum ckrm_event event)
+{
+       int i;
+       struct ckrm_res_ctlr *rcbs;
+       struct ckrm_classtype *clstype;
+       void *old_res_class, *new_res_class;
+
+       if ((newcls == oldcls) || (newcls == NULL)) {
+               ns->core = (void *)oldcls;
+               return;
+       }
+
+       class_lock(class_core(newcls));
+       ns->core = newcls;
+       list_add(&ns->ckrm_link, &class_core(newcls)->objlist);
+       class_unlock(class_core(newcls));
+
+       clstype = class_isa(newcls);
+       for (i = 0; i < clstype->max_resid; i++) {
+               atomic_inc(&clstype->nr_resusers[i]);
+               old_res_class =
+                   oldcls ? class_core(oldcls)->res_class[i] : NULL;
+               new_res_class =
+                   newcls ? class_core(newcls)->res_class[i] : NULL;
+               rcbs = clstype->res_ctlrs[i];
+               if (rcbs && rcbs->change_resclass
+                   && (old_res_class != new_res_class))
+                       (*rcbs->change_resclass) (ns, old_res_class,
+                                                 new_res_class);
+               atomic_dec(&clstype->nr_resusers[i]);
+       }
+       return;
+}
+
+static void ckrm_sock_add_resctrl(struct ckrm_core_class *core, int resid)
+{
+       struct ckrm_net_struct *ns;
+       struct ckrm_res_ctlr *rcbs;
+
+       if ((resid < 0) || (resid >= CKRM_MAX_RES_CTLRS)
+           || ((rcbs = core->classtype->res_ctlrs[resid]) == NULL))
+               return;
+
+       class_lock(core);
+       list_for_each_entry(ns, &core->objlist, ckrm_link) {
+               if (rcbs->change_resclass)
+                       (*rcbs->change_resclass) (ns, NULL,
+                                                 core->res_class[resid]);
+       }
+       class_unlock(core);
+}
+
+/**************************************************************************
+ *                   Functions called from classification points          *
+ **************************************************************************/
+
+static void cb_sockclass_listen_start(struct sock *sk)
+{
+       struct ckrm_net_struct *ns = NULL;
+       struct ckrm_sock_class *newcls = NULL;
+       struct ckrm_res_ctlr *rcbs;
+       struct ckrm_classtype *clstype;
+       int i = 0;
+
+       /* XXX - TBD ipv6 */
+       if (sk->sk_family == AF_INET6)
+               return;
+
+       /* to store the socket address */
+       ns = (struct ckrm_net_struct *)
+           kmalloc(sizeof(struct ckrm_net_struct), GFP_ATOMIC);
+       if (!ns)
+               return;
+
+       memset(ns, 0, sizeof(*ns));
+       INIT_LIST_HEAD(&ns->ckrm_link);
+       ckrm_ns_hold(ns);
+
+       ns->ns_family = sk->sk_family;
+       if (ns->ns_family == AF_INET6)  // IPv6 not supported yet.
+               return;
+
+       ns->ns_daddrv4 = inet_sk(sk)->rcv_saddr;
+       ns->ns_dport = inet_sk(sk)->num;
+
+       ns->ns_pid = current->pid;
+       ns->ns_tgid = current->tgid;
+       ns->ns_tsk = current;
+       ce_protect(&ct_sockclass);
+       CE_CLASSIFY_RET(newcls, &ct_sockclass, CKRM_EVENT_LISTEN_START, ns,
+                       current);
+       ce_release(&ct_sockclass);
+
+       if (newcls == NULL) {
+               newcls = &ckrm_sockclass_dflt_class;
+               ckrm_core_grab(class_core(newcls));
+       }
+
+       class_lock(class_core(newcls));
+       list_add(&ns->ckrm_link, &class_core(newcls)->objlist);
+       ns->core = newcls;
+       class_unlock(class_core(newcls));
+
+       /*
+        * the socket is already locked
+        * take a reference on socket on our behalf
+        */
+       sock_hold(sk);
+       sk->sk_ckrm_ns = (void *)ns;
+       ns->ns_sk = sk;
+
+       /* modify its shares */
+       clstype = class_isa(newcls);
+       for (i = 0; i < clstype->max_resid; i++) {
+               atomic_inc(&clstype->nr_resusers[i]);
+               rcbs = clstype->res_ctlrs[i];
+               if (rcbs && rcbs->change_resclass) {
+                       (*rcbs->change_resclass) ((void *)ns,
+                                                 NULL,
+                                                 class_core(newcls)->
+                                                 res_class[i]);
+               }
+               atomic_dec(&clstype->nr_resusers[i]);
+       }
+       return;
+}
+
+static void cb_sockclass_listen_stop(struct sock *sk)
+{
+       struct ckrm_net_struct *ns = NULL;
+       struct ckrm_sock_class *newcls = NULL;
+
+       /* XXX - TBD ipv6 */
+       if (sk->sk_family == AF_INET6)
+               return;
+
+       ns = (struct ckrm_net_struct *)sk->sk_ckrm_ns;
+       if (!ns)     /* listen_start called before socket_aq was loaded */
+               return;
+
+       newcls = ns->core;
+       if (newcls) {
+               class_lock(class_core(newcls));
+               list_del(&ns->ckrm_link);
+               INIT_LIST_HEAD(&ns->ckrm_link);
+               class_unlock(class_core(newcls));
+               ckrm_core_drop(class_core(newcls));
+       }
+       /* the socket is already locked */
+       sk->sk_ckrm_ns = NULL;
+       sock_put(sk);
+
+       // Should be the last count and free it
+       ckrm_ns_put(ns);
+       return;
+}
+
+static struct ckrm_event_spec ckrm_sock_events_callbacks[] = {
+       {CKRM_EVENT_LISTEN_START, {cb_sockclass_listen_start, NULL}},
+       {CKRM_EVENT_LISTEN_STOP, {cb_sockclass_listen_stop, NULL}},
+       {-1, {NULL, NULL}}
+};
+
+/**************************************************************************
+ *                  Class Object Creation / Destruction
+ **************************************************************************/
+
+static struct ckrm_core_class *ckrm_sock_alloc_class(struct ckrm_core_class 
*parent,
+                                               const char *name)
+{
+       struct ckrm_sock_class *sockcls;
+       sockcls = kmalloc(sizeof(struct ckrm_sock_class), GFP_KERNEL);
+       if (sockcls == NULL)
+               return NULL;
+       memset(sockcls, 0, sizeof(struct ckrm_sock_class));
+
+       ckrm_init_core_class(&ct_sockclass, class_core(sockcls), parent, name);
+
+       ce_protect(&ct_sockclass);
+       if (ct_sockclass.ce_cb_active && ct_sockclass.ce_callbacks.class_add)
+               (*ct_sockclass.ce_callbacks.class_add) (name, sockcls,
+                                                       ct_sockclass.type_id);
+       ce_release(&ct_sockclass);
+
+       return class_core(sockcls);
+}
+
+static int ckrm_sock_free_class(struct ckrm_core_class *core)
+{
+       struct ckrm_sock_class *sockcls;
+
+       if (!ckrm_is_core_valid(core)) {
+               /* Invalid core */
+               return (-EINVAL);
+       }
+       if (core == core->classtype->default_class) {
+               /* reset the name tag */
+               core->name = dflt_sockclass_name;
+               return 0;
+       }
+
+       sockcls = class_type(struct ckrm_sock_class, core);
+
+       ce_protect(&ct_sockclass);
+
+       if (ct_sockclass.ce_cb_active && ct_sockclass.ce_callbacks.class_delete)
+               (*ct_sockclass.ce_callbacks.class_delete) (core->name, sockcls,
+                                                          
ct_sockclass.type_id);
+
+       ckrm_sock_reclassify_class(sockcls);
+
+       ce_release(&ct_sockclass);
+
+       ckrm_release_core_class(core);  
+       /* Could just drop the class?  Error message? */
+
+       return 0;
+}
+
+static int ckrm_sock_show_members(struct ckrm_core_class *core, struct 
seq_file *seq)
+{
+       struct list_head *lh;
+       struct ckrm_net_struct *ns = NULL;
+
+       class_lock(core);
+       list_for_each(lh, &core->objlist) {
+               ns = container_of(lh, struct ckrm_net_struct, ckrm_link);
+               seq_printf(seq, "%d.%d.%d.%d\\%d\n",
+                          NIPQUAD(ns->ns_daddrv4), ns->ns_dport);
+       }
+       class_unlock(core);
+
+       return 0;
+}
+
+static int
+ckrm_sock_forced_reclassify_ns(struct ckrm_net_struct *tns,
+                         struct ckrm_core_class *core)
+{
+       struct ckrm_net_struct *ns = NULL;
+       struct sock *sk = NULL;
+       struct ckrm_sock_class *oldcls, *newcls;
+       int rc = -EINVAL;
+
+       if (!ckrm_is_core_valid(core)) {
+               return rc;
+       }
+
+       newcls = class_type(struct ckrm_sock_class, core);
+       /*
+        * lookup the listening sockets
+        * returns with a reference count set on socket
+        */
+       if (tns->ns_family == AF_INET6)
+               return -EOPNOTSUPP;
+
+       sk = tcp_v4_lookup_listener(tns->ns_daddrv4, tns->ns_dport, 0);
+       if (!sk) {
+               printk(KERN_INFO "No such listener 0x%x:%d\n",
+                      tns->ns_daddrv4, tns->ns_dport);
+               return rc;
+       }
+       lock_sock(sk);
+       if (!sk->sk_ckrm_ns) {
+               goto out;
+       }
+       ns = sk->sk_ckrm_ns;
+       ckrm_ns_hold(ns);
+       if (!capable(CAP_NET_ADMIN) && (ns->ns_tsk->user != current->user)) {
+               ckrm_ns_put(ns);
+               rc = -EPERM;
+               goto out;
+       }
+
+       oldcls = ns->core;
+       if ((oldcls == NULL) || (oldcls == newcls)) {
+               ckrm_ns_put(ns);
+               goto out;
+       }
+       /* remove the net_struct from the current class */
+       class_lock(class_core(oldcls));
+       list_del(&ns->ckrm_link);
+       INIT_LIST_HEAD(&ns->ckrm_link);
+       ns->core = NULL;
+       class_unlock(class_core(oldcls));
+
+       ckrm_sock_set_class(ns, newcls, oldcls, CKRM_EVENT_MANUAL);
+       ckrm_ns_put(ns);
+       rc = 0;
+      out:
+       release_sock(sk);
+       sock_put(sk);
+
+       return rc;
+
+}
+
+enum ckrm_sock_target_token {
+       IPV4, IPV6, SOCKC_TARGET_ERR
+};
+
+static match_table_t ckrm_sock_target_tokens = {
+       {IPV4, "ipv4=%s"},
+       {IPV6, "ipv6=%s"},
+       {SOCKC_TARGET_ERR, NULL},
+};
+
+char *v4toi(char *s, char c, __u32 * v)
+{
+       unsigned int k = 0, n = 0;
+
+       while (*s && (*s != c)) {
+               if (*s == '.') {
+                       n <<= 8;
+                       n |= k;
+                       k = 0;
+               } else
+                       k = k * 10 + *s - '0';
+               s++;
+       }
+
+       n <<= 8;
+       *v = n | k;
+
+       return s;
+}
+
+static int
+ckrm_sock_forced_reclassify(struct ckrm_core_class *target, const char 
*options)
+{
+       char *p, *p2;
+       struct ckrm_net_struct ns;
+       __u32 v4addr, tmp;
+
+       if (!options)
+               return -EINVAL;
+
+       if (target == NULL) {
+               unsigned long id = simple_strtol(options,NULL,0);
+               if (!capable(CAP_NET_ADMIN))
+                       return -EPERM;
+               if (id != 0) 
+                       return -EINVAL;
+               printk("ckrm_sock_class: reclassify all not net implemented\n");
+               return 0;
+       }
+
+       while ((p = strsep((char **)&options, ",")) != NULL) {
+               substring_t args[MAX_OPT_ARGS];
+               int token;
+
+               if (!*p)
+                       continue;
+               token = match_token(p, ckrm_sock_target_tokens, args);
+               switch (token) {
+
+               case IPV4:
+
+                       p2 = p;
+                       while (*p2 && (*p2 != '='))
+                               ++p2;
+                       p2++;
+                       p2 = v4toi(p2, '\\', &(v4addr));
+                       ns.ns_daddrv4 = htonl(v4addr);
+                       ns.ns_family = AF_INET;
+                       p2 = v4toi(++p2, ':', &tmp);
+                       ns.ns_dport = (__u16) tmp;
+                       if (*p2)
+                               p2 = v4toi(++p2, '\0', &ns.ns_pid);
+                       ckrm_sock_forced_reclassify_ns(&ns, target);
+                       break;
+
+               case IPV6:
+                       printk(KERN_INFO "rcfs: IPV6 not supported yet\n");
+                       return -ENOSYS;
+               default:
+                       return -EINVAL;
+               }
+       }
+       return -EINVAL;
+}
+
+/*
+ * Listen_aq reclassification.
+ */
+static void ckrm_sock_reclassify_class(struct ckrm_sock_class *cls)
+{
+       struct ckrm_net_struct *ns, *tns;
+       struct ckrm_core_class *core = class_core(cls);
+       LIST_HEAD(local_list);
+
+       if (!cls)
+               return;
+
+       if (!ckrm_validate_and_grab_core(core))
+               return;
+
+       class_lock(core);
+       /* we have the core refcnt */
+       if (list_empty(&core->objlist)) {
+               class_unlock(core);
+               ckrm_core_drop(core);
+               return;
+       }
+
+       INIT_LIST_HEAD(&local_list);
+       list_splice_init(&core->objlist, &local_list);
+       class_unlock(core);
+       ckrm_core_drop(core);
+
+       list_for_each_entry_safe(ns, tns, &local_list, ckrm_link) {
+               ckrm_ns_hold(ns);
+               list_del(&ns->ckrm_link);
+               if (ns->ns_sk) {
+                       lock_sock(ns->ns_sk);
+                       ckrm_sock_set_class(ns, &ckrm_sockclass_dflt_class, 
NULL,
+                                      CKRM_EVENT_MANUAL);
+                       release_sock(ns->ns_sk);
+               }
+               ckrm_ns_put(ns);
+       }
+       return;
+}
+
+void __init ckrm_meta_init_sockclass(void)
+{
+       printk("...... Initializing ClassType<%s> ........\n",
+              ct_sockclass.name);
+       /* intialize the default class */
+       ckrm_init_core_class(&ct_sockclass, 
class_core(&ckrm_sockclass_dflt_class),
+                            NULL, dflt_sockclass_name);
+
+       /* register classtype and initialize default task class */
+       ckrm_register_classtype(&ct_sockclass);
+       ckrm_register_event_set(ckrm_sock_events_callbacks);
+
+       /*
+        * note registeration of all resource controllers will be done
+        * later dynamically as these are specified as modules
+        */
+}
Index: linux-2.6.12-rc1/kernel/ckrm/Makefile
===================================================================
--- linux-2.6.12-rc1.orig/kernel/ckrm/Makefile  2005-03-18 15:16:33.379482053 
-0800
+++ linux-2.6.12-rc1/kernel/ckrm/Makefile       2005-03-18 15:16:37.399162343 
-0800
@@ -4,3 +4,4 @@
 
 obj-y += ckrm_events.o ckrm.o ckrmutils.o
 obj-$(CONFIG_CKRM_TYPE_TASKCLASS) += ckrm_tc.o
+obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o
Index: linux-2.6.12-rc1/net/ipv4/tcp_ipv4.c
===================================================================
--- linux-2.6.12-rc1.orig/net/ipv4/tcp_ipv4.c   2005-03-17 17:34:08.000000000 
-0800
+++ linux-2.6.12-rc1/net/ipv4/tcp_ipv4.c        2005-03-18 15:16:37.401162184 
-0800
@@ -448,7 +448,8 @@ static struct sock *__tcp_v4_lookup_list
 }
 
 /* Optimize the common listener case. */
-static inline struct sock *tcp_v4_lookup_listener(u32 daddr,
+/* XXX:  Was inline - need to use for CKRM, fix before next release */
+struct sock *tcp_v4_lookup_listener(u32 daddr,
                unsigned short hnum, int dif)
 {
        struct sock *sk = NULL;
@@ -2645,6 +2646,7 @@ EXPORT_SYMBOL(tcp_prot);
 EXPORT_SYMBOL(tcp_put_port);
 EXPORT_SYMBOL(tcp_unhash);
 EXPORT_SYMBOL(tcp_v4_conn_request);
+EXPORT_SYMBOL(tcp_v4_lookup_listener);
 EXPORT_SYMBOL(tcp_v4_connect);
 EXPORT_SYMBOL(tcp_v4_do_rcv);
 EXPORT_SYMBOL(tcp_v4_rebuild_header);

--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to