Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-07 Thread Paul Jackson
Andrew wrote:
 Your email client performs space-stuffing.

Shailabh,

Some of us find that it is easier and more reliable to use a special
purpose script to send patches, rather than trying to do so via our
email client.  Even though my email client, Sylpheed, probably sends
patches just fine, I enjoy the convenience of preparing the patches
and mailing instructions in my editor, before sending them off with
such a utility.

One possible such utility is 'sendpatchset', which I maintain:

  http://www.speakeasy.org/~pj99/sgi/sendpatchset

It's a script, with the documentation embedded as the help message.

Especially when sending off multiple patches as a set, it provides
more reliable results than trying to prepare and send multiple such
simultaneous messages from the typical email client.

-- 
  I won't rest till it's the best ...
  Programmer, Linux Scalability
  Paul Jackson [EMAIL PROTECTED] 1.925.600.0401
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-07 Thread Thomas Graf
* Andrew Morton [EMAIL PROTECTED] 2006-07-06 16:05
 hm, nla_strlcpy() looks more complex than it needs to be.  We really need
 nla_kstrndup() ;)
 
 Oh well.  This?

Looks good.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-07 Thread Thomas Graf
* Andrew Morton [EMAIL PROTECTED] 2006-07-06 15:56
 Yup.  Thomas, what's the testing status of the netlink patch you sent?  
 Should I
 queue it up and start plagueing people with it?

It survived feeding it with oversized strings etc. Feel free
to queue it up.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-06 Thread Andrew Morton
On Thu, 06 Jul 2006 05:28:35 -0400
Shailabh Nagar [EMAIL PROTECTED] wrote:

 On systems with a large number of cpus, with even a modest rate of
 tasks exiting per cpu, the volume of taskstats data sent on thread exit
 can overflow a userspace listener's buffers.
 
 One approach to avoiding overflow is to allow listeners to get data for
 a limited and specific set of cpus. By scaling the number of listeners
 and/or the cpus they monitor, userspace can handle the statistical data
 overload more gracefully.
 
 In this patch, each listener registers to listen to a specific set of
 cpus by specifying a cpumask.  The interest is recorded per-cpu. When
 a task exits on a cpu, its taskstats data is unicast to each listener
 interested in that cpu.
 
 Thanks to Andrew Morton for pointing out the various scalability and
 general concerns of previous attempts and for suggesting this design.
 
 ...

 --- linux-2.6.17-mm3equiv.orig/include/linux/taskstats.h  2006-06-30 
 19:03:40.0 -0400
 +++ linux-2.6.17-mm3equiv/include/linux/taskstats.h   2006-07-06 
 02:38:28.0 -0400

Your email client performs space-stuffing.  Fortunately sed -e 's/^  / /'
is easy.

   #include linux/taskstats_kern.h
   #include linux/delayacct.h
 +#include linux/cpumask.h
 +#include linux/percpu.h
   #include net/genetlink.h
   #include asm/atomic.h

Like that.

 
 +static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
 +{
 + struct listener *s;
 + struct listener_list *listeners;
 + unsigned int cpu;
 + cpumask_t mask;
 + struct list_head *p;
 +
 + memcpy(mask, maskp, sizeof(cpumask_t));

mask = *maskp; ?

 + if (!cpus_subset(mask, cpu_possible_map))
 + return -EINVAL;
 +
 + if (isadd == REGISTER) {
 + for_each_cpu_mask(cpu, mask) {
 + s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
 +  cpu_to_node(cpu));
 + if (!s)
 + return -ENOMEM;
 + s-pid = pid;
 + INIT_LIST_HEAD(s-list);
 +
 + listeners = per_cpu(listener_array, cpu);
 + down_write(listeners-sem);
 + list_add(s-list, listeners-list);
 + up_write(listeners-sem);
 + }
 + } else {
 + for_each_cpu_mask(cpu, mask) {
 + struct list_head *tmp;
 +
 + listeners = per_cpu(listener_array, cpu);
 + down_write(listeners-sem);
 + list_for_each_safe(p, tmp, listeners-list) {
 + s = list_entry(p, struct listener, list);
 + if (s-pid == pid) {
 + list_del(s-list);
 + kfree(s);
 + break;
 + }
 + }
 + up_write(listeners-sem);
 + }
 + }
 + return 0;
 +}

You might choose to handle the ENOMEM situation here by backing out and not
leaving things half-installed.  I suspect that's just a simple `goto'.

 -static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
 +static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
   {
   int rc = 0;
   struct sk_buff *rep_skb;
 @@ -210,6 +302,25 @@ static int taskstats_send_stats(struct s
   void *reply;
   size_t size;
   struct nlattr *na;
 + cpumask_t mask;

When counting add_del_listener(), that's two cpumasks on the stack.  How
big can these get?  256 bytes?  Is it possible to get by with just the one?

 +
 + if (info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
 + na = info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
 + if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
 + return -E2BIG;
 + cpulist_parse((char *)nla_data(na), mask);

Best check the return value from this function.

 + rc = add_del_listener(info-snd_pid, mask, REGISTER);
 + return rc;
 + }
 +
 + if (info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
 + na = info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
 + if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
 + return -E2BIG;
 + cpulist_parse((char *)nla_data(na), mask);
 + rc = add_del_listener(info-snd_pid, mask, DEREGISTER);
 + return rc;
 + }
 
 ...
 
 +void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
 +{
 + struct listener_list *listeners;
 + struct taskstats *tmp;
 + /*
 +  * This is the cpu on which the task is exiting currently and will
 +  * be the one for which the exit event is sent, even if the cpu
 +  * on which this function is running changes later.
 +  */
 + *mycpu = 

Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-06 Thread Shailabh Nagar
On Thu, 2006-07-06 at 02:56 -0700, Andrew Morton wrote:
 On Thu, 06 Jul 2006 05:28:35 -0400
 Shailabh Nagar [EMAIL PROTECTED] wrote:
 
  On systems with a large number of cpus, with even a modest rate of
  tasks exiting per cpu, the volume of taskstats data sent on thread exit
  can overflow a userspace listener's buffers.
  
  One approach to avoiding overflow is to allow listeners to get data for
  a limited and specific set of cpus. By scaling the number of listeners
  and/or the cpus they monitor, userspace can handle the statistical data
  overload more gracefully.
  
  In this patch, each listener registers to listen to a specific set of
  cpus by specifying a cpumask.  The interest is recorded per-cpu. When
  a task exits on a cpu, its taskstats data is unicast to each listener
  interested in that cpu.
  
  Thanks to Andrew Morton for pointing out the various scalability and
  general concerns of previous attempts and for suggesting this design.
  
  ...
 
  --- linux-2.6.17-mm3equiv.orig/include/linux/taskstats.h2006-06-30 
  19:03:40.0 -0400
  +++ linux-2.6.17-mm3equiv/include/linux/taskstats.h 2006-07-06 
  02:38:28.0 -0400
 
 Your email client performs space-stuffing.  Fortunately sed -e 's/^  / /'
 is easy.
 
#include linux/taskstats_kern.h
#include linux/delayacct.h
  +#include linux/cpumask.h
  +#include linux/percpu.h
#include net/genetlink.h
#include asm/atomic.h
 
 Like that.

Sorry. First the text breaking and now patch manglingI'll switch
mail clients.
 
  
  +static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
  +{
  +   struct listener *s;
  +   struct listener_list *listeners;
  +   unsigned int cpu;
  +   cpumask_t mask;
  +   struct list_head *p;
  +
  +   memcpy(mask, maskp, sizeof(cpumask_t));
 
   mask = *maskp; ?

Yes.

 
  +   if (!cpus_subset(mask, cpu_possible_map))
  +   return -EINVAL;
  +
  +   if (isadd == REGISTER) {
  +   for_each_cpu_mask(cpu, mask) {
  +   s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
  +cpu_to_node(cpu));
  +   if (!s)
  +   return -ENOMEM;
  +   s-pid = pid;
  +   INIT_LIST_HEAD(s-list);
  +
  +   listeners = per_cpu(listener_array, cpu);
  +   down_write(listeners-sem);
  +   list_add(s-list, listeners-list);
  +   up_write(listeners-sem);
  +   }
  +   } else {
  +   for_each_cpu_mask(cpu, mask) {
  +   struct list_head *tmp;
  +
  +   listeners = per_cpu(listener_array, cpu);
  +   down_write(listeners-sem);
  +   list_for_each_safe(p, tmp, listeners-list) {
  +   s = list_entry(p, struct listener, list);
  +   if (s-pid == pid) {
  +   list_del(s-list);
  +   kfree(s);
  +   break;
  +   }
  +   }
  +   up_write(listeners-sem);
  +   }
  +   }
  +   return 0;
  +}
 
 You might choose to handle the ENOMEM situation here by backing out and not
 leaving things half-installed.  I suspect that's just a simple `goto'.

Will do. 

 
  -static int taskstats_send_stats(struct sk_buff *skb, struct genl_info 
  *info)
  +static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
{
  int rc = 0;
  struct sk_buff *rep_skb;
  @@ -210,6 +302,25 @@ static int taskstats_send_stats(struct s
  void *reply;
  size_t size;
  struct nlattr *na;
  +   cpumask_t mask;
 
 When counting add_del_listener(), that's two cpumasks on the stack.  How
 big can these get?  256 bytes?  Is it possible to get by with just the one?

For 1024 cpus, 128 bytes. All the cpumask functions seem to need the
mask (though they internally use the address). Will try to see if I can
reduce.
 
  +
  +   if (info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
  +   na = info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
  +   if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
  +   return -E2BIG;
  +   cpulist_parse((char *)nla_data(na), mask);
 
 Best check the return value from this function.

Oops.

 
  +   rc = add_del_listener(info-snd_pid, mask, REGISTER);
  +   return rc;
  +   }
  +
  +   if (info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
  +   na = info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
  +   if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
  +   return -E2BIG;
  +   cpulist_parse((char *)nla_data(na), mask);
  +   rc = add_del_listener(info-snd_pid, mask, DEREGISTER);
  +   return rc;
  +   }
  
  ...
  
  +void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int 
  *mycpu)

Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-06 Thread Shailabh Nagar
On systems with a large number of cpus, with even a modest rate of 
tasks exiting per cpu, the volume of taskstats data sent on thread exit
can overflow a userspace listener's buffers.

One approach to avoiding overflow is to allow listeners to get data for
a limited and specific set of cpus. By scaling the number of listeners 
and/or the cpus they monitor, userspace can handle the statistical data
overload more gracefully.

In this patch, each listener registers to listen to a specific set of 
cpus by specifying a cpumask.  The interest is recorded per-cpu. When 
a task exits on a cpu, its taskstats data is unicast to each listener 
interested in that cpu.

Thanks to Andrew Morton for pointing out the various scalability and 
general concerns of previous attempts and for suggesting this design.

Signed-Off-By: Shailabh Nagar [EMAIL PROTECTED]
---

Addresses various comments by akpm:
- fix your email client !
- avoid memcpy of cpumask
- handle ENOMEM within add_del_listener properly
- check return value of cpulist_parse
- exploit kfree(NULL)

Does not address (can't see an easy way to do)
- try to avoid allocating two cpumask's on stack from taskstats_user_cmd 

Tested on a 1-way and works ok. Testing on 2/4 way being done.


 include/linux/taskstats.h  |4 
 include/linux/taskstats_kern.h |   22 -
 kernel/exit.c  |5 -
 kernel/taskstats.c |  168 ++---
 4 files changed, 168 insertions(+), 31 deletions(-)

Index: linux-2.6.17-mm3equiv/include/linux/taskstats.h
===
--- linux-2.6.17-mm3equiv.orig/include/linux/taskstats.h2006-06-30 
19:03:40.0 -0400
+++ linux-2.6.17-mm3equiv/include/linux/taskstats.h 2006-07-06 
06:48:10.0 -0400
@@ -87,8 +87,6 @@ struct taskstats {
 };
 
 
-#define TASKSTATS_LISTEN_GROUP 0x1
-
 /*
  * Commands sent from userspace
  * Not versioned. New commands should only be inserted at the enum's end
@@ -120,6 +118,8 @@ enum {
TASKSTATS_CMD_ATTR_UNSPEC = 0,
TASKSTATS_CMD_ATTR_PID,
TASKSTATS_CMD_ATTR_TGID,
+   TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
+   TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
__TASKSTATS_CMD_ATTR_MAX,
 };
 
Index: linux-2.6.17-mm3equiv/include/linux/taskstats_kern.h
===
--- linux-2.6.17-mm3equiv.orig/include/linux/taskstats_kern.h   2006-06-30 
11:57:14.0 -0400
+++ linux-2.6.17-mm3equiv/include/linux/taskstats_kern.h2006-07-06 
06:48:10.0 -0400
@@ -20,21 +20,6 @@ enum {
 extern kmem_cache_t *taskstats_cache;
 extern struct mutex taskstats_exit_mutex;
 
-static inline int taskstats_has_listeners(void)
-{
-   if (!genl_sock)
-   return 0;
-   return netlink_has_listeners(genl_sock, TASKSTATS_LISTEN_GROUP);
-}
-
-
-static inline void taskstats_exit_alloc(struct taskstats **ptidstats)
-{
-   *ptidstats = NULL;
-   if (taskstats_has_listeners())
-   *ptidstats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
-}
-
 static inline void taskstats_exit_free(struct taskstats *tidstats)
 {
if (tidstats)
@@ -79,17 +64,18 @@ static inline void taskstats_tgid_free(s
kmem_cache_free(taskstats_cache, stats);
 }
 
-extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int);
+extern void taskstats_exit_alloc(struct taskstats **, unsigned int *);
+extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int, 
unsigned int);
 extern void taskstats_init_early(void);
 extern void taskstats_tgid_alloc(struct signal_struct *);
 #else
-static inline void taskstats_exit_alloc(struct taskstats **ptidstats)
+static inline void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned 
int *mycpu)
 {}
 static inline void taskstats_exit_free(struct taskstats *ptidstats)
 {}
 static inline void taskstats_exit_send(struct task_struct *tsk,
   struct taskstats *tidstats,
-  int group_dead)
+  int group_dead, unsigned int cpu)
 {}
 static inline void taskstats_tgid_init(struct signal_struct *sig)
 {}
Index: linux-2.6.17-mm3equiv/kernel/taskstats.c
===
--- linux-2.6.17-mm3equiv.orig/kernel/taskstats.c   2006-06-30 
23:38:39.0 -0400
+++ linux-2.6.17-mm3equiv/kernel/taskstats.c2006-07-06 07:02:54.0 
-0400
@@ -19,9 +19,17 @@
 #include linux/kernel.h
 #include linux/taskstats_kern.h
 #include linux/delayacct.h
+#include linux/cpumask.h
+#include linux/percpu.h
 #include net/genetlink.h
 #include asm/atomic.h
 
+/*
+ * Maximum length of a cpumask that can be specified in
+ * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute
+ */
+#define TASKSTATS_CPUMASK_MAXLEN   (100+6*NR_CPUS)
+
 static DEFINE_PER_CPU(__u32, 

Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-06 Thread Thomas Graf
* Shailabh Nagar [EMAIL PROTECTED] 2006-07-06 07:37
 @@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
  __read_mostly = {
   [TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
   [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
 + [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
 + [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};

 + na = info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
 + if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
 + return -E2BIG;
 + rc = cpulist_parse((char *)nla_data(na), mask);

This isn't safe, the data in the attribute is not guaranteed to be
NUL terminated. Still it's probably me to blame for not making
this more obvious in the API.

I've attached a patch below extending the API to make it easier
for interfaces using NUL termianted strings, so you'd do:

[TASKSTATS_CMS_ATTR_REGISTER_CPUMASK] = { .type = NLA_NUL_STRING,
  .len = TASKSTATS_CPUMASK_MAXLEN }

... and then use (char *) nla_data(str_attr) without any further
sanity checks.

[NETLINK]: Improve string attribute validation

Introduces a new attribute type NLA_NUL_STRING to support NUL
terminated strings. Attributes of this kind require to carry
a terminating NUL within the maximum specified in the policy.

The `old' NLA_STRING which is not required to be NUL terminated
is extended to provide means to specify a maximum length of the
string.

Aims at easing the pain with using nla_strlcpy() on temporary
buffers.

The old `minlen' field is renamed to `len' for cosmetic purposes
which is ok since nobody was using it at this point.

Signed-off-by: Thomas Graf [EMAIL PROTECTED]

Index: net-2.6.git/include/net/netlink.h
===
--- net-2.6.git.orig/include/net/netlink.h
+++ net-2.6.git/include/net/netlink.h
@@ -158,6 +158,7 @@ enum {
NLA_FLAG,
NLA_MSECS,
NLA_NESTED,
+   NLA_NUL_STRING,
__NLA_TYPE_MAX,
 };
 
@@ -166,21 +167,27 @@ enum {
 /**
  * struct nla_policy - attribute validation policy
  * @type: Type of attribute or NLA_UNSPEC
- * @minlen: Minimal length of payload required to be available
+ * @len: Type specific length of payload
  *
  * Policies are defined as arrays of this struct, the array must be
  * accessible by attribute type up to the highest identifier to be expected.
  *
+ * Meaning of `len' field:
+ *NLA_STRING   Maximum length of string
+ *NLA_NUL_STRING   Maximum length of string including NUL
+ *NLA_FLAG Unused
+ *All otherExact length of attribute payload
+ *
  * Example:
  * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = {
  * [ATTR_FOO] = { .type = NLA_U16 },
- * [ATTR_BAR] = { .type = NLA_STRING },
- * [ATTR_BAZ] = { .minlen = sizeof(struct mystruct) },
+ * [ATTR_BAR] = { .type = NLA_STRING, len = BARSIZ },
+ * [ATTR_BAZ] = { .len = sizeof(struct mystruct) },
  * };
  */
 struct nla_policy {
u16 type;
-   u16 minlen;
+   u16 len;
 };
 
 extern voidnetlink_run_queue(struct sock *sk, unsigned int *qlen,
Index: net-2.6.git/net/netlink/attr.c
===
--- net-2.6.git.orig/net/netlink/attr.c
+++ net-2.6.git/net/netlink/attr.c
@@ -20,7 +20,6 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+
[NLA_U16]   = sizeof(u16),
[NLA_U32]   = sizeof(u32),
[NLA_U64]   = sizeof(u64),
-   [NLA_STRING]= 1,
[NLA_NESTED]= NLA_HDRLEN,
 };
 
@@ -28,7 +27,7 @@ static int validate_nla(struct nlattr *n
struct nla_policy *policy)
 {
struct nla_policy *pt;
-   int minlen = 0;
+   int minlen = 0, attrlen = nla_len(nla);
 
if (nla-nla_type = 0 || nla-nla_type  maxtype)
return 0;
@@ -37,16 +36,33 @@ static int validate_nla(struct nlattr *n
 
BUG_ON(pt-type  NLA_TYPE_MAX);
 
-   if (pt-minlen)
-   minlen = pt-minlen;
-   else if (pt-type != NLA_UNSPEC)
-   minlen = nla_attr_minlen[pt-type];
+   switch (pt-type) {
+   case NLA_FLAG:
+   if (attrlen  0)
+   return -ERANGE;
+   break;
+
+   case NLA_NUL_STRING:
+   minlen = min_t(int, attrlen, pt-len);
+
+   if (!minlen || strnchr(nla_data(nla), minlen, '\0') == NULL)
+   return -EINVAL;
+   /* fall through */
+
+   case NLA_STRING:
+   if (attrlen  1 || attrlen  pt-len)
+   return -ERANGE;
+   break;
+
+   default:
+   if (pt-len)
+   minlen = pt-len;
+   else if (pt-type != NLA_UNSPEC)
+   minlen = nla_attr_minlen[pt-type];
 
-   if (pt-type == NLA_FLAG  

Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-06 Thread Shailabh Nagar
On Thu, 2006-07-06 at 14:08 +0200, Thomas Graf wrote:
 * Shailabh Nagar [EMAIL PROTECTED] 2006-07-06 07:37
  @@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
   __read_mostly = {
  [TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
  [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
  +   [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
  +   [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
 
  +   na = info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
  +   if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
  +   return -E2BIG;
  +   rc = cpulist_parse((char *)nla_data(na), mask);
 
 This isn't safe, the data in the attribute is not guaranteed to be
 NUL terminated. Still it's probably me to blame for not making
 this more obvious in the API.
 
 I've attached a patch below extending the API to make it easier
 for interfaces using NUL termianted strings, so you'd do:
 
 [TASKSTATS_CMS_ATTR_REGISTER_CPUMASK] = { .type = NLA_NUL_STRING,
   .len = TASKSTATS_CPUMASK_MAXLEN }
 
 ... and then use (char *) nla_data(str_attr) without any further
 sanity checks.


Thanks. That makes it much clearer. I was looking around for a maxlen
attribute helper yesterday :-)

--Shailabh


 
 [NETLINK]: Improve string attribute validation
 
 Introduces a new attribute type NLA_NUL_STRING to support NUL
 terminated strings. Attributes of this kind require to carry
 a terminating NUL within the maximum specified in the policy.
 
 The `old' NLA_STRING which is not required to be NUL terminated
 is extended to provide means to specify a maximum length of the
 string.
 
 Aims at easing the pain with using nla_strlcpy() on temporary
 buffers.
 
 The old `minlen' field is renamed to `len' for cosmetic purposes
 which is ok since nobody was using it at this point.
 
 Signed-off-by: Thomas Graf [EMAIL PROTECTED]

snip

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-06 Thread Andrew Morton
Thomas Graf [EMAIL PROTECTED] wrote:

 * Shailabh Nagar [EMAIL PROTECTED] 2006-07-06 07:37
  @@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
   __read_mostly = {
  [TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
  [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
  +   [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
  +   [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
 
  +   na = info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
  +   if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
  +   return -E2BIG;
  +   rc = cpulist_parse((char *)nla_data(na), mask);
 
 This isn't safe, the data in the attribute is not guaranteed to be
 NUL terminated. Still it's probably me to blame for not making
 this more obvious in the API.
 

Thanks, that was an unpleasant bug.

 I've attached a patch below extending the API to make it easier
 for interfaces using NUL termianted strings,

In the interests of keeping this work decoupled from netlink enhancements
I'd propose the below.  Is it bad to modify the data at nla_data()?


--- 
a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
+++ a/kernel/taskstats.c
@@ -299,6 +299,23 @@ cleanup:
return 0;
 }
 
+static int parse(struct nlattr *na, cpumask_t *mask)
+{
+   char *data;
+   int len;
+
+   if (na == NULL)
+   return 1;
+   len = nla_len(na);
+   if (len  TASKSTATS_CPUMASK_MAXLEN)
+   return -E2BIG;
+   if (len  1)
+   return -EINVAL;
+   data = nla_data(na);
+   data[len - 1] = '\0';
+   return cpulist_parse(data, *mask);
+}
+
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
int rc = 0;
@@ -309,27 +326,17 @@ static int taskstats_user_cmd(struct sk_
struct nlattr *na;
cpumask_t mask;
 
-   if (info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
-   na = info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
-   if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
-   return -E2BIG;
-   rc = cpulist_parse((char *)nla_data(na), mask);
-   if (rc)
-   return rc;
-   rc = add_del_listener(info-snd_pid, mask, REGISTER);
+   rc = parse(info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
+   if (rc  0)
return rc;
-   }
+   if (rc == 0)
+   return add_del_listener(info-snd_pid, mask, REGISTER);
 
-   if (info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
-   na = info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
-   if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
-   return -E2BIG;
-   rc = cpulist_parse((char *)nla_data(na), mask);
-   if (rc)
-   return rc;
-   rc = add_del_listener(info-snd_pid, mask, DEREGISTER);
+   rc = parse(info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
+   if (rc  0)
return rc;
-   }
+   if (rc == 0)
+   return add_del_listener(info-snd_pid, mask, DEREGISTER);
 
/*
 * Size includes space for nested attributes
_

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-06 Thread Shailabh Nagar
Andrew Morton wrote:
 Thomas Graf [EMAIL PROTECTED] wrote:
 
* Shailabh Nagar [EMAIL PROTECTED] 2006-07-06 07:37

@@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
 __read_mostly = {
 [TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
 [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
+[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
+[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};

+na = info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
+if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
+return -E2BIG;
+rc = cpulist_parse((char *)nla_data(na), mask);

This isn't safe, the data in the attribute is not guaranteed to be
NUL terminated. Still it's probably me to blame for not making
this more obvious in the API.

 
 
 Thanks, that was an unpleasant bug.
 
 
I've attached a patch below extending the API to make it easier
for interfaces using NUL termianted strings,
 
 
 In the interests of keeping this work decoupled from netlink enhancements
 I'd propose the below.  

The patch looks good. I was also thinking of simply modifying the input
to have the null termination and modify later when netlink provides
generic support.


 Is it bad to modify the data at nla_data()?
 
 
 --- 
 a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
 +++ a/kernel/taskstats.c
 @@ -299,6 +299,23 @@ cleanup:
   return 0;
  }
  
 +static int parse(struct nlattr *na, cpumask_t *mask)
 +{
 + char *data;
 + int len;
 +
 + if (na == NULL)
 + return 1;
 + len = nla_len(na);
 + if (len  TASKSTATS_CPUMASK_MAXLEN)
 + return -E2BIG;
 + if (len  1)
 + return -EINVAL;
 + data = nla_data(na);
 + data[len - 1] = '\0';
 + return cpulist_parse(data, *mask);
 +}
 +
  static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
  {
   int rc = 0;
 @@ -309,27 +326,17 @@ static int taskstats_user_cmd(struct sk_
   struct nlattr *na;
   cpumask_t mask;
  
 - if (info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
 - na = info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
 - if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
 - return -E2BIG;
 - rc = cpulist_parse((char *)nla_data(na), mask);
 - if (rc)
 - return rc;
 - rc = add_del_listener(info-snd_pid, mask, REGISTER);
 + rc = parse(info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
 + if (rc  0)
   return rc;
 - }
 + if (rc == 0)
 + return add_del_listener(info-snd_pid, mask, REGISTER);
  
 - if (info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
 - na = info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
 - if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
 - return -E2BIG;
 - rc = cpulist_parse((char *)nla_data(na), mask);
 - if (rc)
 - return rc;
 - rc = add_del_listener(info-snd_pid, mask, DEREGISTER);
 + rc = parse(info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
 + if (rc  0)
   return rc;
 - }
 + if (rc == 0)
 + return add_del_listener(info-snd_pid, mask, DEREGISTER);
  
   /*
* Size includes space for nested attributes
 _
 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-06 Thread Thomas Graf
* Andrew Morton [EMAIL PROTECTED] 2006-07-06 14:48
 In the interests of keeping this work decoupled from netlink enhancements
 I'd propose the below.  Is it bad to modify the data at nla_data()?

Yes, it points into a skb data buffer which may be shared by sitting
on other queues if the message is to be broadcasted. In this case it
would be harmless but the policy is to leave it unmodified. Otherwise
I agree it's better to move forward and not wait for the API change.

 --- 
 a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
 +++ a/kernel/taskstats.c
 @@ -299,6 +299,23 @@ cleanup:
   return 0;
  }
  
 +static int parse(struct nlattr *na, cpumask_t *mask)
 +{
 + char *data;
 + int len;
 +
 + if (na == NULL)
 + return 1;
 + len = nla_len(na);
 + if (len  TASKSTATS_CPUMASK_MAXLEN)
 + return -E2BIG;
 + if (len  1)
 + return -EINVAL;
 + data = nla_data(na);
 + data[len - 1] = '\0';
 + return cpulist_parse(data, *mask);
 +}

Usually this is done by using strlcpy:

Example fro genetlink.c
if (info-attrs[CTRL_ATTR_FAMILY_NAME]) {
char name[GENL_NAMSIZ];

if (nla_strlcpy(name, info-attrs[CTRL_ATTR_FAMILY_NAME],
GENL_NAMSIZ) = GENL_NAMSIZ)
goto errout;

res = genl_family_find_byname(name);
}
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-06 Thread Andrew Morton
Shailabh Nagar [EMAIL PROTECTED] wrote:

 Andrew Morton wrote:
  Thomas Graf [EMAIL PROTECTED] wrote:
  
 * Shailabh Nagar [EMAIL PROTECTED] 2006-07-06 07:37
 
 @@ -37,9 +45,26 @@ static struct nla_policy taskstats_cmd_g
  __read_mostly = {
[TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
[TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 },
 +  [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
 +  [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
 
 +  na = info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
 +  if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
 +  return -E2BIG;
 +  rc = cpulist_parse((char *)nla_data(na), mask);
 
 This isn't safe, the data in the attribute is not guaranteed to be
 NUL terminated. Still it's probably me to blame for not making
 this more obvious in the API.
 
  
  
  Thanks, that was an unpleasant bug.
  
  
 I've attached a patch below extending the API to make it easier
 for interfaces using NUL termianted strings,
  
  
  In the interests of keeping this work decoupled from netlink enhancements
  I'd propose the below.  
 
 The patch looks good. I was also thinking of simply modifying the input
 to have the null termination and modify later when netlink provides
 generic support.
 
 

Yup.  Thomas, what's the testing status of the netlink patch you sent?  Should I
queue it up and start plagueing people with it?

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks]

2006-07-06 Thread Andrew Morton
Thomas Graf [EMAIL PROTECTED] wrote:

 * Andrew Morton [EMAIL PROTECTED] 2006-07-06 14:48
  In the interests of keeping this work decoupled from netlink enhancements
  I'd propose the below.  Is it bad to modify the data at nla_data()?
 
 Yes, it points into a skb data buffer which may be shared by sitting
 on other queues if the message is to be broadcasted. In this case it
 would be harmless but the policy is to leave it unmodified.

Yup, sleazy-but-safe.

 
  --- 
  a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
  +++ a/kernel/taskstats.c
  @@ -299,6 +299,23 @@ cleanup:
  return 0;
   }
   
  +static int parse(struct nlattr *na, cpumask_t *mask)
  +{
  +   char *data;
  +   int len;
  +
  +   if (na == NULL)
  +   return 1;
  +   len = nla_len(na);
  +   if (len  TASKSTATS_CPUMASK_MAXLEN)
  +   return -E2BIG;
  +   if (len  1)
  +   return -EINVAL;
  +   data = nla_data(na);
  +   data[len - 1] = '\0';
  +   return cpulist_parse(data, *mask);
  +}
 
 Usually this is done by using strlcpy:

hm, nla_strlcpy() looks more complex than it needs to be.  We really need
nla_kstrndup() ;)

Oh well.  This?


diff -puN 
kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
 kernel/taskstats.c
--- 
a/kernel/taskstats.c~per-task-delay-accounting-taskstats-interface-control-exit-data-through-cpumasks-fix
+++ a/kernel/taskstats.c
@@ -299,6 +299,28 @@ cleanup:
return 0;
 }
 
+static int parse(struct nlattr *na, cpumask_t *mask)
+{
+   char *data;
+   int len;
+   int ret;
+
+   if (na == NULL)
+   return 1;
+   len = nla_len(na);
+   if (len  TASKSTATS_CPUMASK_MAXLEN)
+   return -E2BIG;
+   if (len  1)
+   return -EINVAL;
+   data = kmalloc(len, GFP_KERNEL);
+   if (!data)
+   return -ENOMEM;
+   nla_strlcpy(data, na, len);
+   ret = cpulist_parse(data, *mask);
+   kfree(data);
+   return ret;
+}
+
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
int rc = 0;
@@ -309,27 +331,17 @@ static int taskstats_user_cmd(struct sk_
struct nlattr *na;
cpumask_t mask;
 
-   if (info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) {
-   na = info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK];
-   if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
-   return -E2BIG;
-   rc = cpulist_parse((char *)nla_data(na), mask);
-   if (rc)
-   return rc;
-   rc = add_del_listener(info-snd_pid, mask, REGISTER);
+   rc = parse(info-attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
+   if (rc  0)
return rc;
-   }
+   if (rc == 0)
+   return add_del_listener(info-snd_pid, mask, REGISTER);
 
-   if (info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) {
-   na = info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK];
-   if (nla_len(na)  TASKSTATS_CPUMASK_MAXLEN)
-   return -E2BIG;
-   rc = cpulist_parse((char *)nla_data(na), mask);
-   if (rc)
-   return rc;
-   rc = add_del_listener(info-snd_pid, mask, DEREGISTER);
+   rc = parse(info-attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
+   if (rc  0)
return rc;
-   }
+   if (rc == 0)
+   return add_del_listener(info-snd_pid, mask, DEREGISTER);
 
/*
 * Size includes space for nested attributes
_

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html