Re: [PATCH v2 7/7] clone4: Add a CLONE_FD flag to get task exit notification via fd

2015-04-06 Thread Josh Triplett
On Mon, Apr 06, 2015 at 05:30:35PM +0900, Sergey Senozhatsky wrote:
> On (03/15/15 01:00), Josh Triplett wrote:
> [..]
> > +
> > +/* Handle the CLONE_FD case for copy_process. */
> > +int clonefd_do_clone(u64 clone_flags, struct task_struct *p,
> > +struct clone4_args *args, struct clonefd_setup *setup)
> > +{
> > +   int flags;
> > +   struct file *file;
> > +   int fd;
> > +
> > +   p->clonefd = !!(clone_flags & CLONE_FD);
> > +   if (!p->clonefd)
> > +   return 0;
> > +
> > +   if (args->clonefd_flags & ~(O_CLOEXEC | O_NONBLOCK))
> > +   return -EINVAL;
> > +
> > +   init_waitqueue_head(>clonefd_wqh);
> > +
> > +   get_task_struct(p);
> > +   flags = O_RDONLY | FMODE_ATOMIC_POS | args->clonefd_flags;
> > +   file = anon_inode_getfile("[process]", _fops, p, flags);
> > +   if (IS_ERR(file)) {
> > +   put_task_struct(p);
> > +   return PTR_ERR(file);
> > +   }
> > +
> > +   fd = get_unused_fd_flags(flags);
> > +   if (fd < 0) {
> 
> + put_task_struct(p); ?

No, once anon_inode_getfile has succeeded, the file owns the reference
to the task_struct, so fput(file) will call the release function which
calls put_task_struct.  Only the failure case for anon_inode_getfile
needs to call put_task_struct directly.

> > +   fput(file);
> > +   return fd;
> > +   }

- Josh Triplett
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 7/7] clone4: Add a CLONE_FD flag to get task exit notification via fd

2015-04-06 Thread Sergey Senozhatsky
On (03/15/15 01:00), Josh Triplett wrote:
[..]
> +
> +/* Handle the CLONE_FD case for copy_process. */
> +int clonefd_do_clone(u64 clone_flags, struct task_struct *p,
> +  struct clone4_args *args, struct clonefd_setup *setup)
> +{
> + int flags;
> + struct file *file;
> + int fd;
> +
> + p->clonefd = !!(clone_flags & CLONE_FD);
> + if (!p->clonefd)
> + return 0;
> +
> + if (args->clonefd_flags & ~(O_CLOEXEC | O_NONBLOCK))
> + return -EINVAL;
> +
> + init_waitqueue_head(>clonefd_wqh);
> +
> + get_task_struct(p);
> + flags = O_RDONLY | FMODE_ATOMIC_POS | args->clonefd_flags;
> + file = anon_inode_getfile("[process]", _fops, p, flags);
> + if (IS_ERR(file)) {
> + put_task_struct(p);
> + return PTR_ERR(file);
> + }
> +
> + fd = get_unused_fd_flags(flags);
> + if (fd < 0) {

+   put_task_struct(p); ?

> + fput(file);
> + return fd;
> + }
> +
> + setup->fd = fd;
> + setup->file = file;
> + return 0;
> +}
[..]

-ss
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 7/7] clone4: Add a CLONE_FD flag to get task exit notification via fd

2015-04-06 Thread Sergey Senozhatsky
On (03/15/15 01:00), Josh Triplett wrote:
[..]
 +
 +/* Handle the CLONE_FD case for copy_process. */
 +int clonefd_do_clone(u64 clone_flags, struct task_struct *p,
 +  struct clone4_args *args, struct clonefd_setup *setup)
 +{
 + int flags;
 + struct file *file;
 + int fd;
 +
 + p-clonefd = !!(clone_flags  CLONE_FD);
 + if (!p-clonefd)
 + return 0;
 +
 + if (args-clonefd_flags  ~(O_CLOEXEC | O_NONBLOCK))
 + return -EINVAL;
 +
 + init_waitqueue_head(p-clonefd_wqh);
 +
 + get_task_struct(p);
 + flags = O_RDONLY | FMODE_ATOMIC_POS | args-clonefd_flags;
 + file = anon_inode_getfile([process], clonefd_fops, p, flags);
 + if (IS_ERR(file)) {
 + put_task_struct(p);
 + return PTR_ERR(file);
 + }
 +
 + fd = get_unused_fd_flags(flags);
 + if (fd  0) {

+   put_task_struct(p); ?

 + fput(file);
 + return fd;
 + }
 +
 + setup-fd = fd;
 + setup-file = file;
 + return 0;
 +}
[..]

-ss
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 7/7] clone4: Add a CLONE_FD flag to get task exit notification via fd

2015-04-06 Thread Josh Triplett
On Mon, Apr 06, 2015 at 05:30:35PM +0900, Sergey Senozhatsky wrote:
 On (03/15/15 01:00), Josh Triplett wrote:
 [..]
  +
  +/* Handle the CLONE_FD case for copy_process. */
  +int clonefd_do_clone(u64 clone_flags, struct task_struct *p,
  +struct clone4_args *args, struct clonefd_setup *setup)
  +{
  +   int flags;
  +   struct file *file;
  +   int fd;
  +
  +   p-clonefd = !!(clone_flags  CLONE_FD);
  +   if (!p-clonefd)
  +   return 0;
  +
  +   if (args-clonefd_flags  ~(O_CLOEXEC | O_NONBLOCK))
  +   return -EINVAL;
  +
  +   init_waitqueue_head(p-clonefd_wqh);
  +
  +   get_task_struct(p);
  +   flags = O_RDONLY | FMODE_ATOMIC_POS | args-clonefd_flags;
  +   file = anon_inode_getfile([process], clonefd_fops, p, flags);
  +   if (IS_ERR(file)) {
  +   put_task_struct(p);
  +   return PTR_ERR(file);
  +   }
  +
  +   fd = get_unused_fd_flags(flags);
  +   if (fd  0) {
 
 + put_task_struct(p); ?

No, once anon_inode_getfile has succeeded, the file owns the reference
to the task_struct, so fput(file) will call the release function which
calls put_task_struct.  Only the failure case for anon_inode_getfile
needs to call put_task_struct directly.

  +   fput(file);
  +   return fd;
  +   }

- Josh Triplett
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 7/7] clone4: Add a CLONE_FD flag to get task exit notification via fd

2015-03-25 Thread Josh Triplett
On Mon, Mar 23, 2015 at 05:38:45PM +, David Drysdale wrote:
> On Sun, Mar 15, 2015 at 8:00 AM, Josh Triplett  wrote:
> > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > index 9daa017..1dc680b 100644
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -1374,6 +1374,11 @@ struct task_struct {
> >
> > unsigned autoreap:1; /* Do not become a zombie on exit */
> >
> > +#ifdef CONFIG_CLONEFD
> > +   unsigned clonefd:1; /* Notify clonefd_wqh on exit */
> > +   wait_queue_head_t clonefd_wqh;
> > +#endif
> > +
> > unsigned long atomic_flags; /* Flags needing atomic access. */
> >
> > struct restart_block restart_block;
> 
> Idle thought: are there any concerns about the occupancy
> impact of adding a wait_queue_head to every task_struct,
> whether it has a clonefd or not?
> 
> I guess we could reduce the size somewhat by just
> storing a struct file *clonefd_file in the task, and then have
> a separate structure (with the wqh and a task_struct*) referenced
> by file->private_data.  Not sure whether the added complication
> would be worthwhile, though.

My original patches did exactly that (minus the reference back to the
task_struct).  However, there are a couple of problems with that
approach.  First, it assumes that a task_struct has only a single file
referencing it, but in the future I'd like to support obtaining a
clonefd for an existing task.  Second, the task_struct really shouldn't
have a reference to the actual struct file, when it only needs the
wait_queue_head_t.

Also, AFAICT a wait_queue_head_t is normally (in the absence of kernel
lock debugging options) the size of two pointers.  Adding an indirection
and an extra allocation to change that to the size of one pointer seems
iffy, especially when looking at the rest of what's directly in
task_struct that's far larger.

> > --- /dev/null
> > +++ b/kernel/clonefd.c
> > @@ -0,0 +1,121 @@
> > +/*
> > + * Support functions for CLONE_FD
> > + *
> > + * Copyright (c) 2015 Intel Corporation
> > + * Original authors: Josh Triplett 
> > + *   Thiago Macieira 
> > + */
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include "clonefd.h"
> > +
> > +static int clonefd_release(struct inode *inode, struct file *file)
> > +{
> > +   put_task_struct(file->private_data);
> > +   return 0;
> > +}
> > +
> > +static unsigned int clonefd_poll(struct file *file, poll_table *wait)
> > +{
> > +   struct task_struct *p = file->private_data;
> > +   poll_wait(file, >clonefd_wqh, wait);
> > +   return p->exit_state ? (POLLIN | POLLRDNORM | POLLHUP) : 0;
> > +}
> > +
> > +static ssize_t clonefd_read(struct file *file, char __user *buf, size_t 
> > count, loff_t *ppos)
> > +{
> > +   struct task_struct *p = file->private_data;
> > +   int ret = 0;
> > +
> > +   /* EOF after first read */
> > +   if (*ppos)
> > +   return 0;
> > +
> > +   if (file->f_flags & O_NONBLOCK)
> > +   ret = -EAGAIN;
> > +   else
> > +   ret = wait_event_interruptible(p->clonefd_wqh, 
> > p->exit_state);
> > +
> > +   if (p->exit_state) {
> > +   struct clonefd_info info = {};
> > +   cputime_t utime, stime;
> > +   task_exit_code_status(p->exit_code, , 
> > );
> > +   info.code &= ~__SI_MASK;
> > +   task_cputime(p, , );
> > +   info.utime = cputime_to_clock_t(utime + p->signal->utime);
> > +   info.stime = cputime_to_clock_t(stime + p->signal->stime);
> > +   ret = simple_read_from_buffer(buf, count, ppos, , 
> > sizeof(info));
> > +   }
> > +   return ret;
> > +}
> > +
> > +static struct file_operations clonefd_fops = {
> > +   .release = clonefd_release,
> > +   .poll = clonefd_poll,
> > +   .read = clonefd_read,
> > +   .llseek = no_llseek,
> > +};
> 
> It might be nice to include a show_fdinfo() implementation that shows
> (say) the pid that the clonefd refers to.  E.g. something like:
> 
> static void clonefd_show_fdinfo(struct seq_file *m, struct file *file)
> {
> struct task_struct *p = file->private_data;
> 
> seq_printf(m, "tid:\t%d\n", task_tgid_vnr(p));
> }

I thought about that, but that would add a couple of additional ifdefs
(CONFIG_PROC_FS), for an informational file of minimal value.  More
importantly, I don't want to add that until after adding an ioctl or
similar to programmatically obtain the pid from a clonefd; otherwise,
someone might try to use fdinfo as the "API" to do so, which would be
all kinds of awful.

So I'd prefer to add fdinfo in a future extension of clonefd, rather
than in the initial patch series.

> > +
> > +/* Do process exit notification for clonefd. */
> > +void clonefd_do_notify(struct task_struct *p)
> > +{
> > +   if (p->clonefd)
> > +   wake_up_all(>clonefd_wqh);
> > +}
> > +
> > +/* Handle the CLONE_FD case for 

Re: [PATCH v2 7/7] clone4: Add a CLONE_FD flag to get task exit notification via fd

2015-03-25 Thread Josh Triplett
On Mon, Mar 23, 2015 at 05:38:45PM +, David Drysdale wrote:
 On Sun, Mar 15, 2015 at 8:00 AM, Josh Triplett j...@joshtriplett.org wrote:
  diff --git a/include/linux/sched.h b/include/linux/sched.h
  index 9daa017..1dc680b 100644
  --- a/include/linux/sched.h
  +++ b/include/linux/sched.h
  @@ -1374,6 +1374,11 @@ struct task_struct {
 
  unsigned autoreap:1; /* Do not become a zombie on exit */
 
  +#ifdef CONFIG_CLONEFD
  +   unsigned clonefd:1; /* Notify clonefd_wqh on exit */
  +   wait_queue_head_t clonefd_wqh;
  +#endif
  +
  unsigned long atomic_flags; /* Flags needing atomic access. */
 
  struct restart_block restart_block;
 
 Idle thought: are there any concerns about the occupancy
 impact of adding a wait_queue_head to every task_struct,
 whether it has a clonefd or not?
 
 I guess we could reduce the size somewhat by just
 storing a struct file *clonefd_file in the task, and then have
 a separate structure (with the wqh and a task_struct*) referenced
 by file-private_data.  Not sure whether the added complication
 would be worthwhile, though.

My original patches did exactly that (minus the reference back to the
task_struct).  However, there are a couple of problems with that
approach.  First, it assumes that a task_struct has only a single file
referencing it, but in the future I'd like to support obtaining a
clonefd for an existing task.  Second, the task_struct really shouldn't
have a reference to the actual struct file, when it only needs the
wait_queue_head_t.

Also, AFAICT a wait_queue_head_t is normally (in the absence of kernel
lock debugging options) the size of two pointers.  Adding an indirection
and an extra allocation to change that to the size of one pointer seems
iffy, especially when looking at the rest of what's directly in
task_struct that's far larger.

  --- /dev/null
  +++ b/kernel/clonefd.c
  @@ -0,0 +1,121 @@
  +/*
  + * Support functions for CLONE_FD
  + *
  + * Copyright (c) 2015 Intel Corporation
  + * Original authors: Josh Triplett j...@joshtriplett.org
  + *   Thiago Macieira thi...@macieira.org
  + */
  +#include linux/anon_inodes.h
  +#include linux/file.h
  +#include linux/fs.h
  +#include linux/poll.h
  +#include linux/slab.h
  +#include clonefd.h
  +
  +static int clonefd_release(struct inode *inode, struct file *file)
  +{
  +   put_task_struct(file-private_data);
  +   return 0;
  +}
  +
  +static unsigned int clonefd_poll(struct file *file, poll_table *wait)
  +{
  +   struct task_struct *p = file-private_data;
  +   poll_wait(file, p-clonefd_wqh, wait);
  +   return p-exit_state ? (POLLIN | POLLRDNORM | POLLHUP) : 0;
  +}
  +
  +static ssize_t clonefd_read(struct file *file, char __user *buf, size_t 
  count, loff_t *ppos)
  +{
  +   struct task_struct *p = file-private_data;
  +   int ret = 0;
  +
  +   /* EOF after first read */
  +   if (*ppos)
  +   return 0;
  +
  +   if (file-f_flags  O_NONBLOCK)
  +   ret = -EAGAIN;
  +   else
  +   ret = wait_event_interruptible(p-clonefd_wqh, 
  p-exit_state);
  +
  +   if (p-exit_state) {
  +   struct clonefd_info info = {};
  +   cputime_t utime, stime;
  +   task_exit_code_status(p-exit_code, info.code, 
  info.status);
  +   info.code = ~__SI_MASK;
  +   task_cputime(p, utime, stime);
  +   info.utime = cputime_to_clock_t(utime + p-signal-utime);
  +   info.stime = cputime_to_clock_t(stime + p-signal-stime);
  +   ret = simple_read_from_buffer(buf, count, ppos, info, 
  sizeof(info));
  +   }
  +   return ret;
  +}
  +
  +static struct file_operations clonefd_fops = {
  +   .release = clonefd_release,
  +   .poll = clonefd_poll,
  +   .read = clonefd_read,
  +   .llseek = no_llseek,
  +};
 
 It might be nice to include a show_fdinfo() implementation that shows
 (say) the pid that the clonefd refers to.  E.g. something like:
 
 static void clonefd_show_fdinfo(struct seq_file *m, struct file *file)
 {
 struct task_struct *p = file-private_data;
 
 seq_printf(m, tid:\t%d\n, task_tgid_vnr(p));
 }

I thought about that, but that would add a couple of additional ifdefs
(CONFIG_PROC_FS), for an informational file of minimal value.  More
importantly, I don't want to add that until after adding an ioctl or
similar to programmatically obtain the pid from a clonefd; otherwise,
someone might try to use fdinfo as the API to do so, which would be
all kinds of awful.

So I'd prefer to add fdinfo in a future extension of clonefd, rather
than in the initial patch series.

  +
  +/* Do process exit notification for clonefd. */
  +void clonefd_do_notify(struct task_struct *p)
  +{
  +   if (p-clonefd)
  +   wake_up_all(p-clonefd_wqh);
  +}
  +
  +/* Handle the CLONE_FD case for copy_process. */
  +int clonefd_do_clone(u64 clone_flags, struct task_struct 

Re: [PATCH v2 7/7] clone4: Add a CLONE_FD flag to get task exit notification via fd

2015-03-23 Thread David Drysdale
On Sun, Mar 15, 2015 at 8:00 AM, Josh Triplett  wrote:
> diff --git a/include/linux/compat.h b/include/linux/compat.h
> index 6c4a68d..c90df5a 100644
> --- a/include/linux/compat.h
> +++ b/include/linux/compat.h
> @@ -299,6 +299,8 @@ struct compat_clone4_args {
> compat_ulong_t stack_start;
> compat_ulong_t stack_size;
> compat_ulong_t tls;
> +   compat_uptr_t clonefd;
> +   u32 clonefd_flags;
>  };
>
>  struct compat_statfs;
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 9daa017..1dc680b 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1374,6 +1374,11 @@ struct task_struct {
>
> unsigned autoreap:1; /* Do not become a zombie on exit */
>
> +#ifdef CONFIG_CLONEFD
> +   unsigned clonefd:1; /* Notify clonefd_wqh on exit */
> +   wait_queue_head_t clonefd_wqh;
> +#endif
> +
> unsigned long atomic_flags; /* Flags needing atomic access. */
>
> struct restart_block restart_block;

Idle thought: are there any concerns about the occupancy
impact of adding a wait_queue_head to every task_struct,
whether it has a clonefd or not?

I guess we could reduce the size somewhat by just
storing a struct file *clonefd_file in the task, and then have
a separate structure (with the wqh and a task_struct*) referenced
by file->private_data.  Not sure whether the added complication
would be worthwhile, though.

> diff --git a/kernel/clonefd.c b/kernel/clonefd.c
> new file mode 100644
> index 000..eac560c
> --- /dev/null
> +++ b/kernel/clonefd.c
> @@ -0,0 +1,121 @@
> +/*
> + * Support functions for CLONE_FD
> + *
> + * Copyright (c) 2015 Intel Corporation
> + * Original authors: Josh Triplett 
> + *   Thiago Macieira 
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include "clonefd.h"
> +
> +static int clonefd_release(struct inode *inode, struct file *file)
> +{
> +   put_task_struct(file->private_data);
> +   return 0;
> +}
> +
> +static unsigned int clonefd_poll(struct file *file, poll_table *wait)
> +{
> +   struct task_struct *p = file->private_data;
> +   poll_wait(file, >clonefd_wqh, wait);
> +   return p->exit_state ? (POLLIN | POLLRDNORM | POLLHUP) : 0;
> +}
> +
> +static ssize_t clonefd_read(struct file *file, char __user *buf, size_t 
> count, loff_t *ppos)
> +{
> +   struct task_struct *p = file->private_data;
> +   int ret = 0;
> +
> +   /* EOF after first read */
> +   if (*ppos)
> +   return 0;
> +
> +   if (file->f_flags & O_NONBLOCK)
> +   ret = -EAGAIN;
> +   else
> +   ret = wait_event_interruptible(p->clonefd_wqh, p->exit_state);
> +
> +   if (p->exit_state) {
> +   struct clonefd_info info = {};
> +   cputime_t utime, stime;
> +   task_exit_code_status(p->exit_code, , );
> +   info.code &= ~__SI_MASK;
> +   task_cputime(p, , );
> +   info.utime = cputime_to_clock_t(utime + p->signal->utime);
> +   info.stime = cputime_to_clock_t(stime + p->signal->stime);
> +   ret = simple_read_from_buffer(buf, count, ppos, , 
> sizeof(info));
> +   }
> +   return ret;
> +}
> +
> +static struct file_operations clonefd_fops = {
> +   .release = clonefd_release,
> +   .poll = clonefd_poll,
> +   .read = clonefd_read,
> +   .llseek = no_llseek,
> +};

It might be nice to include a show_fdinfo() implementation that shows
(say) the pid that the clonefd refers to.  E.g. something like:

static void clonefd_show_fdinfo(struct seq_file *m, struct file *file)
{
struct task_struct *p = file->private_data;

seq_printf(m, "tid:\t%d\n", task_tgid_vnr(p));
}

> +
> +/* Do process exit notification for clonefd. */
> +void clonefd_do_notify(struct task_struct *p)
> +{
> +   if (p->clonefd)
> +   wake_up_all(>clonefd_wqh);
> +}
> +
> +/* Handle the CLONE_FD case for copy_process. */
> +int clonefd_do_clone(u64 clone_flags, struct task_struct *p,
> +struct clone4_args *args, struct clonefd_setup *setup)
> +{
> +   int flags;
> +   struct file *file;
> +   int fd;
> +
> +   p->clonefd = !!(clone_flags & CLONE_FD);
> +   if (!p->clonefd)
> +   return 0;
> +
> +   if (args->clonefd_flags & ~(O_CLOEXEC | O_NONBLOCK))
> +   return -EINVAL;
> +

Maybe also check for (args->clonefd == NULL) in advance, and
return -EINVAL or -EFAULT?

> +   init_waitqueue_head(>clonefd_wqh);
> +
> +   get_task_struct(p);
> +   flags = O_RDONLY | FMODE_ATOMIC_POS | args->clonefd_flags;
> +   file = anon_inode_getfile("[process]", _fops, p, flags);
> +   if (IS_ERR(file)) {
> +   put_task_struct(p);
> +   return PTR_ERR(file);
> +   }
> +
> +   fd = get_unused_fd_flags(flags);
> +   if (fd < 0) {
> +   fput(file);
> +   return fd;
> +   }
> +

Re: [PATCH v2 7/7] clone4: Add a CLONE_FD flag to get task exit notification via fd

2015-03-23 Thread David Drysdale
On Sun, Mar 15, 2015 at 8:00 AM, Josh Triplett j...@joshtriplett.org wrote:
 diff --git a/include/linux/compat.h b/include/linux/compat.h
 index 6c4a68d..c90df5a 100644
 --- a/include/linux/compat.h
 +++ b/include/linux/compat.h
 @@ -299,6 +299,8 @@ struct compat_clone4_args {
 compat_ulong_t stack_start;
 compat_ulong_t stack_size;
 compat_ulong_t tls;
 +   compat_uptr_t clonefd;
 +   u32 clonefd_flags;
  };

  struct compat_statfs;
 diff --git a/include/linux/sched.h b/include/linux/sched.h
 index 9daa017..1dc680b 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
 @@ -1374,6 +1374,11 @@ struct task_struct {

 unsigned autoreap:1; /* Do not become a zombie on exit */

 +#ifdef CONFIG_CLONEFD
 +   unsigned clonefd:1; /* Notify clonefd_wqh on exit */
 +   wait_queue_head_t clonefd_wqh;
 +#endif
 +
 unsigned long atomic_flags; /* Flags needing atomic access. */

 struct restart_block restart_block;

Idle thought: are there any concerns about the occupancy
impact of adding a wait_queue_head to every task_struct,
whether it has a clonefd or not?

I guess we could reduce the size somewhat by just
storing a struct file *clonefd_file in the task, and then have
a separate structure (with the wqh and a task_struct*) referenced
by file-private_data.  Not sure whether the added complication
would be worthwhile, though.

 diff --git a/kernel/clonefd.c b/kernel/clonefd.c
 new file mode 100644
 index 000..eac560c
 --- /dev/null
 +++ b/kernel/clonefd.c
 @@ -0,0 +1,121 @@
 +/*
 + * Support functions for CLONE_FD
 + *
 + * Copyright (c) 2015 Intel Corporation
 + * Original authors: Josh Triplett j...@joshtriplett.org
 + *   Thiago Macieira thi...@macieira.org
 + */
 +#include linux/anon_inodes.h
 +#include linux/file.h
 +#include linux/fs.h
 +#include linux/poll.h
 +#include linux/slab.h
 +#include clonefd.h
 +
 +static int clonefd_release(struct inode *inode, struct file *file)
 +{
 +   put_task_struct(file-private_data);
 +   return 0;
 +}
 +
 +static unsigned int clonefd_poll(struct file *file, poll_table *wait)
 +{
 +   struct task_struct *p = file-private_data;
 +   poll_wait(file, p-clonefd_wqh, wait);
 +   return p-exit_state ? (POLLIN | POLLRDNORM | POLLHUP) : 0;
 +}
 +
 +static ssize_t clonefd_read(struct file *file, char __user *buf, size_t 
 count, loff_t *ppos)
 +{
 +   struct task_struct *p = file-private_data;
 +   int ret = 0;
 +
 +   /* EOF after first read */
 +   if (*ppos)
 +   return 0;
 +
 +   if (file-f_flags  O_NONBLOCK)
 +   ret = -EAGAIN;
 +   else
 +   ret = wait_event_interruptible(p-clonefd_wqh, p-exit_state);
 +
 +   if (p-exit_state) {
 +   struct clonefd_info info = {};
 +   cputime_t utime, stime;
 +   task_exit_code_status(p-exit_code, info.code, info.status);
 +   info.code = ~__SI_MASK;
 +   task_cputime(p, utime, stime);
 +   info.utime = cputime_to_clock_t(utime + p-signal-utime);
 +   info.stime = cputime_to_clock_t(stime + p-signal-stime);
 +   ret = simple_read_from_buffer(buf, count, ppos, info, 
 sizeof(info));
 +   }
 +   return ret;
 +}
 +
 +static struct file_operations clonefd_fops = {
 +   .release = clonefd_release,
 +   .poll = clonefd_poll,
 +   .read = clonefd_read,
 +   .llseek = no_llseek,
 +};

It might be nice to include a show_fdinfo() implementation that shows
(say) the pid that the clonefd refers to.  E.g. something like:

static void clonefd_show_fdinfo(struct seq_file *m, struct file *file)
{
struct task_struct *p = file-private_data;

seq_printf(m, tid:\t%d\n, task_tgid_vnr(p));
}

 +
 +/* Do process exit notification for clonefd. */
 +void clonefd_do_notify(struct task_struct *p)
 +{
 +   if (p-clonefd)
 +   wake_up_all(p-clonefd_wqh);
 +}
 +
 +/* Handle the CLONE_FD case for copy_process. */
 +int clonefd_do_clone(u64 clone_flags, struct task_struct *p,
 +struct clone4_args *args, struct clonefd_setup *setup)
 +{
 +   int flags;
 +   struct file *file;
 +   int fd;
 +
 +   p-clonefd = !!(clone_flags  CLONE_FD);
 +   if (!p-clonefd)
 +   return 0;
 +
 +   if (args-clonefd_flags  ~(O_CLOEXEC | O_NONBLOCK))
 +   return -EINVAL;
 +

Maybe also check for (args-clonefd == NULL) in advance, and
return -EINVAL or -EFAULT?

 +   init_waitqueue_head(p-clonefd_wqh);
 +
 +   get_task_struct(p);
 +   flags = O_RDONLY | FMODE_ATOMIC_POS | args-clonefd_flags;
 +   file = anon_inode_getfile([process], clonefd_fops, p, flags);
 +   if (IS_ERR(file)) {
 +   put_task_struct(p);
 +   return PTR_ERR(file);
 +   }
 +
 +   fd = get_unused_fd_flags(flags);
 +   if (fd  0) {
 +   fput(file);
 +   return fd;
 +   }