RDS module sits on top of TCP (rds_tcp) and IB (rds_rdma), so messages
arrive in form of skb (over TCP) and scatterlist (over IB/RDMA).
However, because socket filter only deal with skb (e.g. struct skb as
bpf context) we can only use socket filter for rds_tcp and not for
rds_rdma.

Considering one filtering solution for RDS, it seems that the common
denominator between sk_buff and scatterlist is scatterlist. Therefore,
this patch converts skb to sgvec and invoke sg_filter_run for
rds_tcp and simply invoke sg_filter_run for IB/rds_rdma.

Signed-off-by: Tushar Dave <tushar.n.d...@oracle.com>
Reviewed-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
---
 net/rds/ib.c       |  1 +
 net/rds/ib.h       |  1 +
 net/rds/ib_recv.c  | 12 ++++++++++++
 net/rds/rds.h      |  2 ++
 net/rds/recv.c     | 17 +++++++++++++++++
 net/rds/tcp.c      |  2 ++
 net/rds/tcp.h      |  2 ++
 net/rds/tcp_recv.c | 38 ++++++++++++++++++++++++++++++++++++++
 8 files changed, 75 insertions(+)

diff --git a/net/rds/ib.c b/net/rds/ib.c
index 89c6333..6ba1f75 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -532,6 +532,7 @@ struct rds_transport rds_ib_transport = {
        .conn_path_shutdown     = rds_ib_conn_path_shutdown,
        .inc_copy_to_user       = rds_ib_inc_copy_to_user,
        .inc_free               = rds_ib_inc_free,
+       .inc_to_sg_get          = rds_ib_inc_to_sg_get,
        .cm_initiate_connect    = rds_ib_cm_initiate_connect,
        .cm_handle_connect      = rds_ib_cm_handle_connect,
        .cm_connect_complete    = rds_ib_cm_connect_complete,
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 73427ff..0a12b41 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -404,6 +404,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,
 void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
 void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
 void rds_ib_inc_free(struct rds_incoming *inc);
+int rds_ib_inc_to_sg_get(struct rds_incoming *inc, struct scatterlist **sg);
 int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
 void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc,
                             struct rds_ib_ack_state *state);
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index d300186..2f76a91 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -219,6 +219,18 @@ void rds_ib_inc_free(struct rds_incoming *inc)
        rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
 }
 
+int rds_ib_inc_to_sg_get(struct rds_incoming *inc, struct scatterlist **sg)
+{
+       struct rds_ib_incoming *ibinc;
+       struct rds_page_frag *frag;
+
+       ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
+       frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
+       *sg =  &frag->f_sg;
+
+       return 0;
+}
+
 static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
                                  struct rds_ib_recv_work *recv)
 {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c4dcf65..abcd5ce 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -542,6 +542,8 @@ struct rds_transport {
        int (*recv_path)(struct rds_conn_path *cp);
        int (*inc_copy_to_user)(struct rds_incoming *inc, struct iov_iter *to);
        void (*inc_free)(struct rds_incoming *inc);
+       int (*inc_to_sg_get)(struct rds_incoming *inc, struct scatterlist **sg);
+       void (*inc_to_sg_put)(struct scatterlist **sg);
 
        int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
                                 struct rdma_cm_event *event, bool isv6);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 504cd6b..261904c 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -292,6 +292,8 @@ void rds_recv_incoming(struct rds_connection *conn, struct 
in6_addr *saddr,
        struct sock *sk;
        unsigned long flags;
        struct rds_conn_path *cp;
+       struct sk_filter *filter;
+       int result = __SOCKSG_PASS;
 
        inc->i_conn = conn;
        inc->i_rx_jiffies = jiffies;
@@ -376,6 +378,21 @@ void rds_recv_incoming(struct rds_connection *conn, struct 
in6_addr *saddr,
        /* We can be racing with rds_release() which marks the socket dead. */
        sk = rds_rs_to_sk(rs);
 
+       rcu_read_lock();
+       filter = rcu_dereference(sk->sk_filter);
+       if (filter) {
+               if (conn->c_trans->inc_to_sg_get) {
+                       struct scatterlist *sg;
+
+                       if (conn->c_trans->inc_to_sg_get(inc, &sg) == 0) {
+                               result = sg_filter_run(sk, sg);
+                               if (conn->c_trans->inc_to_sg_put)
+                                       conn->c_trans->inc_to_sg_put(&sg);
+                       }
+               }
+       }
+       rcu_read_unlock();
+
        /* serialize with rds_release -> sock_orphan */
        write_lock_irqsave(&rs->rs_recv_lock, flags);
        if (!sock_flag(sk, SOCK_DEAD)) {
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 2c7b7c3..35454c7 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -465,6 +465,8 @@ struct rds_transport rds_tcp_transport = {
        .conn_path_shutdown     = rds_tcp_conn_path_shutdown,
        .inc_copy_to_user       = rds_tcp_inc_copy_to_user,
        .inc_free               = rds_tcp_inc_free,
+       .inc_to_sg_get          = rds_tcp_inc_to_sg_get,
+       .inc_to_sg_put          = rds_tcp_inc_to_sg_put,
        .stats_info_copy        = rds_tcp_stats_info_copy,
        .exit                   = rds_tcp_exit,
        .t_owner                = THIS_MODULE,
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 3c69361..b2cc910 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -82,6 +82,8 @@ void rds_tcp_restore_callbacks(struct socket *sock,
 int rds_tcp_recv_path(struct rds_conn_path *cp);
 void rds_tcp_inc_free(struct rds_incoming *inc);
 int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
+int rds_tcp_inc_to_sg_get(struct rds_incoming *inc, struct scatterlist **sg);
+void rds_tcp_inc_to_sg_put(struct scatterlist **sg);
 
 /* tcp_send.c */
 void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 42c5ff1..b45e69b 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -56,6 +56,44 @@ void rds_tcp_inc_free(struct rds_incoming *inc)
        kmem_cache_free(rds_tcp_incoming_slab, tinc);
 }
 
+#define MAX_SG MAX_SKB_FRAGS
+int rds_tcp_inc_to_sg_get(struct rds_incoming *inc, struct scatterlist **sg)
+{
+       struct scatterlist *sg_list;
+       struct rds_tcp_incoming *tinc;
+       struct sk_buff *skb;
+       int num_sg = 0;
+
+       tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
+
+       /* For now we are assuming that the max sg elements we need is MAX_SG.
+        * To determine actual number of sg elements we need to traverse the
+        * skb queue e.g.
+        *
+        * skb_queue_walk(&tinc->ti_skb_list, skb) {
+        *      num_sg += skb_shinfo(skb)->nr_frags + 1;
+        * }
+        */
+       sg_list = kzalloc(sizeof(*sg_list) * MAX_SG, GFP_KERNEL);
+       if (!sg_list)
+               return -ENOMEM;
+
+       sg_init_table(sg_list, MAX_SG);
+       skb_queue_walk(&tinc->ti_skb_list, skb) {
+               num_sg += skb_to_sgvec_nomark(skb, &sg_list[num_sg], 0,
+                                             skb->len);
+       }
+       sg_mark_end(&sg_list[num_sg - 1]);
+       *sg = sg_list;
+
+       return 0;
+}
+
+void rds_tcp_inc_to_sg_put(struct scatterlist **sg)
+{
+       kfree(*sg);
+}
+
 /*
  * this is pretty lame, but, whatever.
  */
-- 
1.8.3.1

Reply via email to