Socket notifications.

This patch includes socket send/recv/accept notifications.
Using trivial web server based on kevent and this features
instead of epoll it's performance increased more than noticebly.
More details about various benchmarks and server itself 
(evserver_kevent.c) can be found on project's homepage.

Signed-off-by: Evgeniy Polyakov <[EMAIL PROTECTED]>

diff --git a/fs/inode.c b/fs/inode.c
index bf21dc6..82817b1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,6 +21,7 @@
 #include <linux/cdev.h>
 #include <linux/bootmem.h>
 #include <linux/inotify.h>
+#include <linux/kevent.h>
 #include <linux/mount.h>
 
 /*
@@ -164,12 +165,18 @@ static struct inode *alloc_inode(struct super_block *sb)
                }
                inode->i_private = NULL;
                inode->i_mapping = mapping;
+#if defined CONFIG_KEVENT_SOCKET || defined CONFIG_KEVENT_PIPE
+               kevent_storage_init(inode, &inode->st);
+#endif
        }
        return inode;
 }
 
 void destroy_inode(struct inode *inode) 
 {
+#if defined CONFIG_KEVENT_SOCKET || defined CONFIG_KEVENT_PIPE
+       kevent_storage_fini(&inode->st);
+#endif
        BUG_ON(inode_has_buffers(inode));
        security_inode_free(inode);
        if (inode->i_sb->s_op->destroy_inode)
diff --git a/include/net/sock.h b/include/net/sock.h
index 03684e7..d840399 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -49,6 +49,7 @@
 #include <linux/skbuff.h>      /* struct sk_buff */
 #include <linux/mm.h>
 #include <linux/security.h>
+#include <linux/kevent.h>
 
 #include <linux/filter.h>
 
@@ -451,6 +452,21 @@ static inline int sk_stream_memory_free(struct sock *sk)
 
 extern void sk_stream_rfree(struct sk_buff *skb);
 
+struct socket_alloc {
+       struct socket socket;
+       struct inode vfs_inode;
+};
+
+static inline struct socket *SOCKET_I(struct inode *inode)
+{
+       return &container_of(inode, struct socket_alloc, vfs_inode)->socket;
+}
+
+static inline struct inode *SOCK_INODE(struct socket *socket)
+{
+       return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
+}
+
 static inline void sk_stream_set_owner_r(struct sk_buff *skb, struct sock *sk)
 {
        skb->sk = sk;
@@ -478,6 +494,7 @@ static inline void sk_add_backlog(struct sock *sk, struct 
sk_buff *skb)
                sk->sk_backlog.tail = skb;
        }
        skb->next = NULL;
+       kevent_socket_notify(sk, KEVENT_SOCKET_RECV);
 }
 
 #define sk_wait_event(__sk, __timeo, __condition)              \
@@ -679,21 +696,6 @@ static inline struct kiocb *siocb_to_kiocb(struct 
sock_iocb *si)
        return si->kiocb;
 }
 
-struct socket_alloc {
-       struct socket socket;
-       struct inode vfs_inode;
-};
-
-static inline struct socket *SOCKET_I(struct inode *inode)
-{
-       return &container_of(inode, struct socket_alloc, vfs_inode)->socket;
-}
-
-static inline struct inode *SOCK_INODE(struct socket *socket)
-{
-       return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
-}
-
 extern void __sk_stream_mem_reclaim(struct sock *sk);
 extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind);
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b7d8317..2763b30 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -864,6 +864,7 @@ static inline int tcp_prequeue(struct sock *sk, struct 
sk_buff *skb)
                        tp->ucopy.memory = 0;
                } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
                        wake_up_interruptible(sk->sk_sleep);
+                       kevent_socket_notify(sk, 
KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND);
                        if (!inet_csk_ack_scheduled(sk))
                                inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
                                                          (3 * TCP_RTO_MIN) / 4,
diff --git a/kernel/kevent/kevent_socket.c b/kernel/kevent/kevent_socket.c
new file mode 100644
index 0000000..d1a2701
--- /dev/null
+++ b/kernel/kevent/kevent_socket.c
@@ -0,0 +1,144 @@
+/*
+ *     kevent_socket.c
+ * 
+ * 2006 Copyright (c) Evgeniy Polyakov <[EMAIL PROTECTED]>
+ * All rights reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/file.h>
+#include <linux/tcp.h>
+#include <linux/kevent.h>
+
+#include <net/sock.h>
+#include <net/request_sock.h>
+#include <net/inet_connection_sock.h>
+
+static int kevent_socket_callback(struct kevent *k)
+{
+       struct inode *inode = k->st->origin;
+       unsigned int events = SOCKET_I(inode)->ops->poll(SOCKET_I(inode)->file, 
SOCKET_I(inode), NULL);
+
+       if ((events & (POLLIN | POLLRDNORM)) && (k->event.event & 
(KEVENT_SOCKET_RECV | KEVENT_SOCKET_ACCEPT)))
+               return 1;
+       if ((events & (POLLOUT | POLLWRNORM)) && (k->event.event & 
KEVENT_SOCKET_SEND))
+               return 1;
+       if (events & (POLLERR | POLLHUP | POLLRDHUP | POLLREMOVE))
+               return -1;
+       return 0;
+}
+
+int kevent_socket_enqueue(struct kevent *k)
+{
+       struct inode *inode;
+       struct socket *sock;
+       int err = -EBADF;
+
+       sock = sockfd_lookup(k->event.id.raw[0], &err);
+       if (!sock)
+               goto err_out_exit;
+
+       inode = igrab(SOCK_INODE(sock));
+       if (!inode)
+               goto err_out_fput;
+
+       err = kevent_storage_enqueue(&inode->st, k);
+       if (err)
+               goto err_out_iput;
+
+       if (k->event.req_flags & KEVENT_REQ_ALWAYS_QUEUE) {
+               kevent_requeue(k);
+               err = 0;
+       } else {
+               err = k->callbacks.callback(k);
+               if (err)
+                       goto err_out_dequeue;
+       }
+
+       return err;
+
+err_out_dequeue:
+       kevent_storage_dequeue(k->st, k);
+err_out_iput:
+       iput(inode);
+err_out_fput:
+       sockfd_put(sock);
+err_out_exit:
+       return err;
+}
+
+int kevent_socket_dequeue(struct kevent *k)
+{
+       struct inode *inode = k->st->origin;
+       struct socket *sock;
+
+       kevent_storage_dequeue(k->st, k);
+
+       sock = SOCKET_I(inode);
+       iput(inode);
+       sockfd_put(sock);
+
+       return 0;
+}
+
+void kevent_socket_notify(struct sock *sk, u32 event)
+{
+       if (sk->sk_socket)
+               kevent_storage_ready(&SOCK_INODE(sk->sk_socket)->st, NULL, 
event);
+}
+
+/*
+ * It is required for network protocols compiled as modules, like IPv6.
+ */
+EXPORT_SYMBOL_GPL(kevent_socket_notify);
+
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key kevent_sock_key;
+
+void kevent_socket_reinit(struct socket *sock)
+{
+       struct inode *inode = SOCK_INODE(sock);
+
+       lockdep_set_class(&inode->st.lock, &kevent_sock_key);
+}
+
+void kevent_sk_reinit(struct sock *sk)
+{
+       if (sk->sk_socket) {
+               struct inode *inode = SOCK_INODE(sk->sk_socket);
+
+               lockdep_set_class(&inode->st.lock, &kevent_sock_key);
+       }
+}
+#endif
+static int __init kevent_init_socket(void)
+{
+       struct kevent_callbacks sc = {
+               .callback = &kevent_socket_callback,
+               .enqueue = &kevent_socket_enqueue,
+               .dequeue = &kevent_socket_dequeue,
+               .flags = 0,
+       };
+
+       return kevent_add_callbacks(&sc, KEVENT_SOCKET);
+}
+module_init(kevent_init_socket);
diff --git a/net/core/sock.c b/net/core/sock.c
index 0ed5b4f..e687f54 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1393,6 +1393,7 @@ static void sock_def_wakeup(struct sock *sk)
        if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
                wake_up_interruptible_all(sk->sk_sleep);
        read_unlock(&sk->sk_callback_lock);
+       kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND);
 }
 
 static void sock_def_error_report(struct sock *sk)
@@ -1402,6 +1403,7 @@ static void sock_def_error_report(struct sock *sk)
                wake_up_interruptible(sk->sk_sleep);
        sk_wake_async(sk,0,POLL_ERR); 
        read_unlock(&sk->sk_callback_lock);
+       kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND);
 }
 
 static void sock_def_readable(struct sock *sk, int len)
@@ -1411,6 +1413,7 @@ static void sock_def_readable(struct sock *sk, int len)
                wake_up_interruptible(sk->sk_sleep);
        sk_wake_async(sk,1,POLL_IN);
        read_unlock(&sk->sk_callback_lock);
+       kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND);
 }
 
 static void sock_def_write_space(struct sock *sk)
@@ -1430,6 +1433,7 @@ static void sock_def_write_space(struct sock *sk)
        }
 
        read_unlock(&sk->sk_callback_lock);
+       kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV);
 }
 
 static void sock_def_destruct(struct sock *sk)
@@ -1480,6 +1484,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_state            =       TCP_CLOSE;
        sk->sk_socket           =       sock;
 
+       kevent_sk_reinit(sk);
+
        sock_set_flag(sk, SOCK_ZAPPED);
 
        if(sock)
@@ -1546,8 +1552,10 @@ void fastcall release_sock(struct sock *sk)
        if (sk->sk_backlog.tail)
                __release_sock(sk);
        sk->sk_lock.owner = NULL;
-       if (waitqueue_active(&sk->sk_lock.wq))
+       if (waitqueue_active(&sk->sk_lock.wq)) {
                wake_up(&sk->sk_lock.wq);
+               kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND);
+       }
        spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL(release_sock);
diff --git a/net/core/stream.c b/net/core/stream.c
index d1d7dec..2878c2a 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -36,6 +36,7 @@ void sk_stream_write_space(struct sock *sk)
                        wake_up_interruptible(sk->sk_sleep);
                if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
                        sock_wake_async(sock, 2, POLL_OUT);
+               kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV);
        }
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c701f6a..84ce4c5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3127,6 +3127,7 @@ static void tcp_ofo_queue(struct sock *sk)
 
                __skb_unlink(skb, &tp->out_of_order_queue);
                __skb_queue_tail(&sk->sk_receive_queue, skb);
+               kevent_socket_notify(sk, KEVENT_SOCKET_RECV);
                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                if(skb->h.th->fin)
                        tcp_fin(skb, sk, skb->h.th);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bf7a224..bca9f2a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -61,6 +61,7 @@
 #include <linux/jhash.h>
 #include <linux/init.h>
 #include <linux/times.h>
+#include <linux/kevent.h>
 
 #include <net/icmp.h>
 #include <net/inet_hashtables.h>
@@ -1391,6 +1392,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff 
*skb)
                reqsk_free(req);
        } else {
                inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+               kevent_socket_notify(sk, KEVENT_SOCKET_ACCEPT);
        }
        return 0;
 
diff --git a/net/socket.c b/net/socket.c
index 4e39631..776dc2e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -84,6 +84,7 @@
 #include <linux/kmod.h>
 #include <linux/audit.h>
 #include <linux/wireless.h>
+#include <linux/kevent.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -496,6 +497,8 @@ static struct socket *sock_alloc(void)
        inode->i_uid = current->fsuid;
        inode->i_gid = current->fsgid;
 
+       kevent_socket_reinit(sock);
+
        get_cpu_var(sockets_in_use)++;
        put_cpu_var(sockets_in_use);
        return sock;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2f208c7..835e20f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1563,8 +1563,10 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct 
socket *sock,
        struct scm_cookie tmp_scm;
        struct sock *sk = sock->sk;
        struct unix_sock *u = unix_sk(sk);
+       struct sock *other;
        int noblock = flags & MSG_DONTWAIT;
        struct sk_buff *skb;
+
        int err;
 
        err = -EOPNOTSUPP;
@@ -1580,6 +1582,12 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct 
socket *sock,
                goto out_unlock;
 
        wake_up_interruptible(&u->peer_wait);
+       other =unix_peer_get(sk);
+       if (other) {
+               kevent_socket_notify(other, KEVENT_SOCKET_SEND);
+               sock_put(other);
+       } else
+               kevent_socket_notify(sk, KEVENT_SOCKET_RECV);
 
        if (msg->msg_name)
                unix_copy_addr(msg, skb->sk);
@@ -1674,7 +1682,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct 
socket *sock,
 {
        struct sock_iocb *siocb = kiocb_to_siocb(iocb);
        struct scm_cookie tmp_scm;
-       struct sock *sk = sock->sk;
+       struct sock *sk = sock->sk, *other;
        struct unix_sock *u = unix_sk(sk);
        struct sockaddr_un *sunaddr=msg->msg_name;
        int copied = 0;
@@ -1803,6 +1811,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, 
struct socket *sock,
                }
        } while (size);
 
+       other =unix_peer_get(sk);
+       if (other) {
+               kevent_socket_notify(other, KEVENT_SOCKET_SEND);
+               sock_put(other);
+       }
+       if (sk->sk_shutdown & RCV_SHUTDOWN || !other)
+               kevent_socket_notify(sk, KEVENT_SOCKET_RECV);
+
        mutex_unlock(&u->readlock);
        scm_recv(sock, msg, siocb->scm, flags);
 out:
@@ -1824,6 +1840,10 @@ static int unix_shutdown(struct socket *sock, int mode)
                        sock_hold(other);
                unix_state_wunlock(sk);
                sk->sk_state_change(sk);
+               kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV);
+               if (other)
+                       kevent_socket_notify(other, 
KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV);
+               kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV);
 
                if (other &&
                        (sk->sk_type == SOCK_STREAM || sk->sk_type == 
SOCK_SEQPACKET)) {

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to