Enable busy waiting for pipes. pipe_busy_wait is called if pipe is empty or full which spins for specified micro seconds. wake_up_busy_poll is called when data is written or read to signal any busy waiting threads. A tunable pipe_busy_poll is introduced to enable or disable busy waiting via /proc. The value of it specifies the amount of spin in microseconds.
Signed-off-by: subhra mazumdar <subhra.mazum...@oracle.com> --- fs/pipe.c | 58 +++++++++++++++++++++++++++++++++++++++++++++-- include/linux/pipe_fs_i.h | 1 + kernel/sysctl.c | 7 ++++++ 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 97e5be8..03ce76a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -44,6 +44,7 @@ unsigned int pipe_min_size = PAGE_SIZE; */ unsigned long pipe_user_pages_hard; unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; +unsigned int pipe_busy_poll; /* * We use a start+len construction, which provides full use of the @@ -122,6 +123,35 @@ void pipe_wait(struct pipe_inode_info *pipe) pipe_lock(pipe); } +void pipe_busy_wait(struct pipe_inode_info *pipe) +{ + unsigned long wait_flag = pipe->pipe_wait_flag; + unsigned long start_time = pipe_busy_loop_current_time(); + + pipe_unlock(pipe); + preempt_disable(); + for (;;) { + if (pipe->pipe_wait_flag > wait_flag) { + preempt_enable(); + pipe_lock(pipe); + return; + } + if (pipe_busy_loop_timeout(pipe, start_time)) + break; + cpu_relax(); + } + preempt_enable(); + pipe_lock(pipe); + if (pipe->pipe_wait_flag > wait_flag) + return; + pipe_wait(pipe); +} + +void wake_up_busy_poll(struct pipe_inode_info *pipe) +{ + pipe->pipe_wait_flag++; +} + static void anon_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { @@ -254,6 +284,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) struct pipe_inode_info *pipe = filp->private_data; int do_wakeup; ssize_t ret; + unsigned int poll = pipe->pipe_ll_usec; /* Null read succeeds. */ if (unlikely(total_len == 0)) @@ -331,11 +362,18 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) break; } if (do_wakeup) { + if (poll) + wake_up_busy_poll(pipe); wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } - pipe_wait(pipe); + if (poll) + pipe_busy_wait(pipe); + else + pipe_wait(pipe); } + if (poll && do_wakeup) + wake_up_busy_poll(pipe); __pipe_unlock(pipe); /* Signal writers asynchronously that there is more room. */ @@ -362,6 +400,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) int do_wakeup = 0; size_t total_len = iov_iter_count(from); ssize_t chars; + unsigned int poll = pipe->pipe_ll_usec; /* Null write succeeds. */ if (unlikely(total_len == 0)) @@ -467,15 +506,22 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) break; } if (do_wakeup) { + if (poll) + wake_up_busy_poll(pipe); wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); do_wakeup = 0; } pipe->waiting_writers++; - pipe_wait(pipe); + if (poll) + pipe_busy_wait(pipe); + else + pipe_wait(pipe); pipe->waiting_writers--; } out: + if (poll && do_wakeup) + wake_up_busy_poll(pipe); __pipe_unlock(pipe); if (do_wakeup) { wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); @@ -564,6 +610,7 @@ static int pipe_release(struct inode *inode, struct file *file) { struct pipe_inode_info *pipe = file->private_data; + unsigned int poll = pipe->pipe_ll_usec; __pipe_lock(pipe); if (file->f_mode & FMODE_READ) @@ -572,6 +619,8 @@ pipe_release(struct inode *inode, struct file *file) pipe->writers--; if (pipe->readers || pipe->writers) { + if (poll) + wake_up_busy_poll(pipe); wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); @@ -840,6 +889,7 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) struct file *files[2]; int fd[2]; int error; + struct pipe_inode_info *pipe; error = __do_pipe_flags(fd, files, flags); if (!error) { @@ -853,6 +903,10 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) fd_install(fd[0], files[0]); fd_install(fd[1], files[1]); } + pipe = files[0]->private_data; + pipe->pipe_ll_usec = pipe_busy_poll; + pipe = files[1]->private_data; + pipe->pipe_ll_usec = pipe_busy_poll; } return error; } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index fdfd2a2..3b96b05 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -188,6 +188,7 @@ void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); extern unsigned int pipe_max_size, pipe_min_size; extern unsigned long pipe_user_pages_hard; extern unsigned long pipe_user_pages_soft; +extern unsigned int pipe_busy_poll; int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *); /* Drop the inode semaphore and wait for a pipe event, atomically */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d9c31bc..823bde1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1842,6 +1842,13 @@ static struct ctl_table fs_table[] = { .proc_handler = proc_doulongvec_minmax, }, { + .procname = "pipe-busy-poll", + .data = &pipe_busy_poll, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + }, + { .procname = "mount-max", .data = &sysctl_mount_max, .maxlen = sizeof(unsigned int), -- 2.9.3