Socket notifications. This patch includes socket send/recv/accept notifications. Using trivial web server based on kevent and this features instead of epoll it's performance increased more than noticebly. More details about various benchmarks and server itself (evserver_kevent.c) can be found on project's homepage.
Signed-off-by: Evgeniy Polyakov <[EMAIL PROTECTED]> diff --git a/fs/inode.c b/fs/inode.c index bf21dc6..82817b1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -21,6 +21,7 @@ #include <linux/cdev.h> #include <linux/bootmem.h> #include <linux/inotify.h> +#include <linux/kevent.h> #include <linux/mount.h> /* @@ -164,12 +165,18 @@ static struct inode *alloc_inode(struct super_block *sb) } inode->i_private = NULL; inode->i_mapping = mapping; +#if defined CONFIG_KEVENT_SOCKET || defined CONFIG_KEVENT_PIPE + kevent_storage_init(inode, &inode->st); +#endif } return inode; } void destroy_inode(struct inode *inode) { +#if defined CONFIG_KEVENT_SOCKET || defined CONFIG_KEVENT_PIPE + kevent_storage_fini(&inode->st); +#endif BUG_ON(inode_has_buffers(inode)); security_inode_free(inode); if (inode->i_sb->s_op->destroy_inode) diff --git a/include/net/sock.h b/include/net/sock.h index 03684e7..d840399 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -49,6 +49,7 @@ #include <linux/skbuff.h> /* struct sk_buff */ #include <linux/mm.h> #include <linux/security.h> +#include <linux/kevent.h> #include <linux/filter.h> @@ -451,6 +452,21 @@ static inline int sk_stream_memory_free(struct sock *sk) extern void sk_stream_rfree(struct sk_buff *skb); +struct socket_alloc { + struct socket socket; + struct inode vfs_inode; +}; + +static inline struct socket *SOCKET_I(struct inode *inode) +{ + return &container_of(inode, struct socket_alloc, vfs_inode)->socket; +} + +static inline struct inode *SOCK_INODE(struct socket *socket) +{ + return &container_of(socket, struct socket_alloc, socket)->vfs_inode; +} + static inline void sk_stream_set_owner_r(struct sk_buff *skb, struct sock *sk) { skb->sk = sk; @@ -478,6 +494,7 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) sk->sk_backlog.tail = skb; } skb->next = NULL; + kevent_socket_notify(sk, KEVENT_SOCKET_RECV); } #define sk_wait_event(__sk, __timeo, __condition) \ @@ -679,21 +696,6 @@ static inline struct kiocb *siocb_to_kiocb(struct sock_iocb *si) return si->kiocb; } -struct socket_alloc { - struct socket socket; - struct inode vfs_inode; -}; - -static inline struct socket *SOCKET_I(struct inode *inode) -{ - return &container_of(inode, struct socket_alloc, vfs_inode)->socket; -} - -static inline struct inode *SOCK_INODE(struct socket *socket) -{ - return &container_of(socket, struct socket_alloc, socket)->vfs_inode; -} - extern void __sk_stream_mem_reclaim(struct sock *sk); extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind); diff --git a/include/net/tcp.h b/include/net/tcp.h index b7d8317..2763b30 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -864,6 +864,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible(sk->sk_sleep); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, (3 * TCP_RTO_MIN) / 4, diff --git a/kernel/kevent/kevent_socket.c b/kernel/kevent/kevent_socket.c new file mode 100644 index 0000000..d1a2701 --- /dev/null +++ b/kernel/kevent/kevent_socket.c @@ -0,0 +1,144 @@ +/* + * kevent_socket.c + * + * 2006 Copyright (c) Evgeniy Polyakov <[EMAIL PROTECTED]> + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/timer.h> +#include <linux/file.h> +#include <linux/tcp.h> +#include <linux/kevent.h> + +#include <net/sock.h> +#include <net/request_sock.h> +#include <net/inet_connection_sock.h> + +static int kevent_socket_callback(struct kevent *k) +{ + struct inode *inode = k->st->origin; + unsigned int events = SOCKET_I(inode)->ops->poll(SOCKET_I(inode)->file, SOCKET_I(inode), NULL); + + if ((events & (POLLIN | POLLRDNORM)) && (k->event.event & (KEVENT_SOCKET_RECV | KEVENT_SOCKET_ACCEPT))) + return 1; + if ((events & (POLLOUT | POLLWRNORM)) && (k->event.event & KEVENT_SOCKET_SEND)) + return 1; + if (events & (POLLERR | POLLHUP | POLLRDHUP | POLLREMOVE)) + return -1; + return 0; +} + +int kevent_socket_enqueue(struct kevent *k) +{ + struct inode *inode; + struct socket *sock; + int err = -EBADF; + + sock = sockfd_lookup(k->event.id.raw[0], &err); + if (!sock) + goto err_out_exit; + + inode = igrab(SOCK_INODE(sock)); + if (!inode) + goto err_out_fput; + + err = kevent_storage_enqueue(&inode->st, k); + if (err) + goto err_out_iput; + + if (k->event.req_flags & KEVENT_REQ_ALWAYS_QUEUE) { + kevent_requeue(k); + err = 0; + } else { + err = k->callbacks.callback(k); + if (err) + goto err_out_dequeue; + } + + return err; + +err_out_dequeue: + kevent_storage_dequeue(k->st, k); +err_out_iput: + iput(inode); +err_out_fput: + sockfd_put(sock); +err_out_exit: + return err; +} + +int kevent_socket_dequeue(struct kevent *k) +{ + struct inode *inode = k->st->origin; + struct socket *sock; + + kevent_storage_dequeue(k->st, k); + + sock = SOCKET_I(inode); + iput(inode); + sockfd_put(sock); + + return 0; +} + +void kevent_socket_notify(struct sock *sk, u32 event) +{ + if (sk->sk_socket) + kevent_storage_ready(&SOCK_INODE(sk->sk_socket)->st, NULL, event); +} + +/* + * It is required for network protocols compiled as modules, like IPv6. + */ +EXPORT_SYMBOL_GPL(kevent_socket_notify); + +#ifdef CONFIG_LOCKDEP +static struct lock_class_key kevent_sock_key; + +void kevent_socket_reinit(struct socket *sock) +{ + struct inode *inode = SOCK_INODE(sock); + + lockdep_set_class(&inode->st.lock, &kevent_sock_key); +} + +void kevent_sk_reinit(struct sock *sk) +{ + if (sk->sk_socket) { + struct inode *inode = SOCK_INODE(sk->sk_socket); + + lockdep_set_class(&inode->st.lock, &kevent_sock_key); + } +} +#endif +static int __init kevent_init_socket(void) +{ + struct kevent_callbacks sc = { + .callback = &kevent_socket_callback, + .enqueue = &kevent_socket_enqueue, + .dequeue = &kevent_socket_dequeue, + .flags = 0, + }; + + return kevent_add_callbacks(&sc, KEVENT_SOCKET); +} +module_init(kevent_init_socket); diff --git a/net/core/sock.c b/net/core/sock.c index 0ed5b4f..e687f54 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1393,6 +1393,7 @@ static void sock_def_wakeup(struct sock *sk) if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_all(sk->sk_sleep); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_error_report(struct sock *sk) @@ -1402,6 +1403,7 @@ static void sock_def_error_report(struct sock *sk) wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk,0,POLL_ERR); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_readable(struct sock *sk, int len) @@ -1411,6 +1413,7 @@ static void sock_def_readable(struct sock *sk, int len) wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk,1,POLL_IN); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_write_space(struct sock *sk) @@ -1430,6 +1433,7 @@ static void sock_def_write_space(struct sock *sk) } read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); } static void sock_def_destruct(struct sock *sk) @@ -1480,6 +1484,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_state = TCP_CLOSE; sk->sk_socket = sock; + kevent_sk_reinit(sk); + sock_set_flag(sk, SOCK_ZAPPED); if(sock) @@ -1546,8 +1552,10 @@ void fastcall release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); sk->sk_lock.owner = NULL; - if (waitqueue_active(&sk->sk_lock.wq)) + if (waitqueue_active(&sk->sk_lock.wq)) { wake_up(&sk->sk_lock.wq); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); + } spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(release_sock); diff --git a/net/core/stream.c b/net/core/stream.c index d1d7dec..2878c2a 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -36,6 +36,7 @@ void sk_stream_write_space(struct sock *sk) wake_up_interruptible(sk->sk_sleep); if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, 2, POLL_OUT); + kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c701f6a..84ce4c5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3127,6 +3127,7 @@ static void tcp_ofo_queue(struct sock *sk) __skb_unlink(skb, &tp->out_of_order_queue); __skb_queue_tail(&sk->sk_receive_queue, skb); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if(skb->h.th->fin) tcp_fin(skb, sk, skb->h.th); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bf7a224..bca9f2a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -61,6 +61,7 @@ #include <linux/jhash.h> #include <linux/init.h> #include <linux/times.h> +#include <linux/kevent.h> #include <net/icmp.h> #include <net/inet_hashtables.h> @@ -1391,6 +1392,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) reqsk_free(req); } else { inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + kevent_socket_notify(sk, KEVENT_SOCKET_ACCEPT); } return 0; diff --git a/net/socket.c b/net/socket.c index 4e39631..776dc2e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -84,6 +84,7 @@ #include <linux/kmod.h> #include <linux/audit.h> #include <linux/wireless.h> +#include <linux/kevent.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -496,6 +497,8 @@ static struct socket *sock_alloc(void) inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; + kevent_socket_reinit(sock); + get_cpu_var(sockets_in_use)++; put_cpu_var(sockets_in_use); return sock; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2f208c7..835e20f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1563,8 +1563,10 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, struct scm_cookie tmp_scm; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); + struct sock *other; int noblock = flags & MSG_DONTWAIT; struct sk_buff *skb; + int err; err = -EOPNOTSUPP; @@ -1580,6 +1582,12 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_unlock; wake_up_interruptible(&u->peer_wait); + other =unix_peer_get(sk); + if (other) { + kevent_socket_notify(other, KEVENT_SOCKET_SEND); + sock_put(other); + } else + kevent_socket_notify(sk, KEVENT_SOCKET_RECV); if (msg->msg_name) unix_copy_addr(msg, skb->sk); @@ -1674,7 +1682,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock_iocb *siocb = kiocb_to_siocb(iocb); struct scm_cookie tmp_scm; - struct sock *sk = sock->sk; + struct sock *sk = sock->sk, *other; struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr=msg->msg_name; int copied = 0; @@ -1803,6 +1811,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } } while (size); + other =unix_peer_get(sk); + if (other) { + kevent_socket_notify(other, KEVENT_SOCKET_SEND); + sock_put(other); + } + if (sk->sk_shutdown & RCV_SHUTDOWN || !other) + kevent_socket_notify(sk, KEVENT_SOCKET_RECV); + mutex_unlock(&u->readlock); scm_recv(sock, msg, siocb->scm, flags); out: @@ -1824,6 +1840,10 @@ static int unix_shutdown(struct socket *sock, int mode) sock_hold(other); unix_state_wunlock(sk); sk->sk_state_change(sk); + kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); + if (other) + kevent_socket_notify(other, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); + kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); if (other && (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/