STT implementation we saw performance improvements with linearizing
skb for SLUB case.  So following patch skips zero copy operation
for such a case.

Performance number for large packet TCP test using netperf.

OVS branch     TCP      Host0     Host1
version        Gbps     CPU%      CPU%
-----------------------------------------
2.5            9.4       272       315

master +       9.4       230       285
patch

Tested-By: Vasmi Abidi <vab...@vmware.com>
Signed-off-by: Pravin B Shelar <pshe...@ovn.org>
---
v1-v2:
- while reassembly, try to merge skb before copying the skbs.
- define separate coalesce_skb() to copy skb for SLUB case.
---
 datapath/linux/compat/stt.c | 250 +++++++++++++++++++++++++++++++-------------
 1 file changed, 176 insertions(+), 74 deletions(-)

diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c
index eb397e8..a1b309a 100644
--- a/datapath/linux/compat/stt.c
+++ b/datapath/linux/compat/stt.c
@@ -49,6 +49,14 @@
 #define STT_DST_PORT 7471
 
 #ifdef OVS_STT
+#ifdef CONFIG_SLUB
+/*
+ * We saw better performance with skipping zero copy in case of SLUB.
+ * So skip zero copy for SLUB case.
+ */
+#define SKIP_ZERO_COPY
+#endif
+
 #define STT_VER 0
 
 /* @list: Per-net list of STT ports.
@@ -219,73 +227,6 @@ static int clear_gso(struct sk_buff *skb)
        return 0;
 }
 
-static struct sk_buff *normalize_frag_list(struct sk_buff *head,
-                                          struct sk_buff **skbp)
-{
-       struct sk_buff *skb = *skbp;
-       struct sk_buff *last;
-
-       do {
-               struct sk_buff *frags;
-
-               if (skb_shared(skb)) {
-                       struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
-                       if (unlikely(!nskb))
-                               return ERR_PTR(-ENOMEM);
-
-                       nskb->next = skb->next;
-                       consume_skb(skb);
-                       skb = nskb;
-                       *skbp = skb;
-               }
-
-               if (head) {
-                       head->len -= skb->len;
-                       head->data_len -= skb->len;
-                       head->truesize -= skb->truesize;
-               }
-
-               frags = skb_shinfo(skb)->frag_list;
-               if (frags) {
-                       int err;
-
-                       err = skb_unclone(skb, GFP_ATOMIC);
-                       if (unlikely(err))
-                               return ERR_PTR(err);
-
-                       last = normalize_frag_list(skb, &frags);
-                       if (IS_ERR(last))
-                               return last;
-
-                       skb_shinfo(skb)->frag_list = NULL;
-                       last->next = skb->next;
-                       skb->next = frags;
-               } else {
-                       last = skb;
-               }
-
-               skbp = &skb->next;
-       } while ((skb = skb->next));
-
-       return last;
-}
-
-/* Takes a linked list of skbs, which potentially contain frag_list
- * (whose members in turn potentially contain frag_lists, etc.) and
- * converts them into a single linear linked list.
- */
-static int straighten_frag_list(struct sk_buff **skbp)
-{
-       struct sk_buff *err_skb;
-
-       err_skb = normalize_frag_list(NULL, skbp);
-       if (IS_ERR(err_skb))
-               return PTR_ERR(err_skb);
-
-       return 0;
-}
-
 static void copy_skb_metadata(struct sk_buff *to, struct sk_buff *from)
 {
        to->protocol = from->protocol;
@@ -465,6 +406,74 @@ static int skb_list_segment(struct sk_buff *head, bool 
ipv4, int l4_offset)
        return 0;
 }
 
+#ifndef SKIP_ZERO_COPY
+static struct sk_buff *normalize_frag_list(struct sk_buff *head,
+                                          struct sk_buff **skbp)
+{
+       struct sk_buff *skb = *skbp;
+       struct sk_buff *last;
+
+       do {
+               struct sk_buff *frags;
+
+               if (skb_shared(skb)) {
+                       struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+                       if (unlikely(!nskb))
+                               return ERR_PTR(-ENOMEM);
+
+                       nskb->next = skb->next;
+                       consume_skb(skb);
+                       skb = nskb;
+                       *skbp = skb;
+               }
+
+               if (head) {
+                       head->len -= skb->len;
+                       head->data_len -= skb->len;
+                       head->truesize -= skb->truesize;
+               }
+
+               frags = skb_shinfo(skb)->frag_list;
+               if (frags) {
+                       int err;
+
+                       err = skb_unclone(skb, GFP_ATOMIC);
+                       if (unlikely(err))
+                               return ERR_PTR(err);
+
+                       last = normalize_frag_list(skb, &frags);
+                       if (IS_ERR(last))
+                               return last;
+
+                       skb_shinfo(skb)->frag_list = NULL;
+                       last->next = skb->next;
+                       skb->next = frags;
+               } else {
+                       last = skb;
+               }
+
+               skbp = &skb->next;
+       } while ((skb = skb->next));
+
+       return last;
+}
+
+/* Takes a linked list of skbs, which potentially contain frag_list
+ * (whose members in turn potentially contain frag_lists, etc.) and
+ * converts them into a single linear linked list.
+ */
+static int straighten_frag_list(struct sk_buff **skbp)
+{
+       struct sk_buff *err_skb;
+
+       err_skb = normalize_frag_list(NULL, skbp);
+       if (IS_ERR(err_skb))
+               return PTR_ERR(err_skb);
+
+       return 0;
+}
+
 static int coalesce_skb(struct sk_buff **headp)
 {
        struct sk_buff *frag, *head, *prev;
@@ -510,6 +519,32 @@ static int coalesce_skb(struct sk_buff **headp)
        head->next = NULL;
        return 0;
 }
+#else
+static int coalesce_skb(struct sk_buff **headp)
+{
+       struct sk_buff *frag, *head = *headp;
+       int tot_len = FRAG_CB(head)->first.tot_len;
+       int err;
+
+       if (!head->next)
+               return 0;
+
+       err = pskb_expand_head(head, skb_headroom(head), tot_len, GFP_ATOMIC);
+       if (err)
+               return err;
+
+       if (unlikely(!__pskb_pull_tail(head, head->data_len)))
+               BUG();
+
+       for (frag = head->next; frag; frag = frag->next) {
+               skb_copy_bits(frag, 0, skb_put(head, frag->len), frag->len);
+       }
+
+       head->next = NULL;
+       head->truesize = SKB_TRUESIZE(head->len);
+       return 0;
+}
+#endif
 
 static int __try_to_segment(struct sk_buff *skb, bool csum_partial,
                            bool ipv4, bool tcp, int l4_offset)
@@ -522,6 +557,12 @@ static int __try_to_segment(struct sk_buff *skb, bool 
csum_partial,
 
 static int try_to_segment(struct sk_buff *skb)
 {
+#ifdef SKIP_ZERO_COPY
+       /* coalesce_skb() since does not generate frag-list no need to
+        * linearize it here.
+        */
+       return 0;
+#else
        struct stthdr *stth = stt_hdr(skb);
        bool csum_partial = !!(stth->flags & STT_CSUM_PARTIAL);
        bool ipv4 = !!(stth->flags & STT_PROTO_IPV4);
@@ -529,16 +570,19 @@ static int try_to_segment(struct sk_buff *skb)
        int l4_offset = stth->l4_offset;
 
        return __try_to_segment(skb, csum_partial, ipv4, tcp, l4_offset);
+#endif
 }
 
 static int segment_skb(struct sk_buff **headp, bool csum_partial,
                       bool ipv4, bool tcp, int l4_offset)
 {
+#ifndef SKIP_ZERO_COPY
        int err;
 
        err = coalesce_skb(headp);
        if (err)
                return err;
+#endif
 
        if (skb_shinfo(*headp)->frag_list)
                return __try_to_segment(*headp, csum_partial,
@@ -1054,16 +1098,61 @@ static struct pkt_frag *lookup_frag(struct net *net,
        return victim_frag;
 }
 
+#ifdef SKIP_ZERO_COPY
+static int __copy_skb(struct sk_buff *to, struct sk_buff *from,
+                     int *delta, bool *headstolen)
+{
+       int err;
+
+       if (to->next)
+               return -EINVAL;
+
+       if (FRAG_CB(to)->offset)
+               return -EINVAL;
+
+       if (unlikely(skb_unclone(to, GFP_ATOMIC)))
+               return -ENOMEM;
+       if (skb_try_coalesce(to, from, headstolen, delta))
+               return 0;
+
+       *headstolen = false;
+       err = pskb_expand_head(to, skb_headroom(to),
+                              to->len + from->len, GFP_ATOMIC);
+       if (err)
+               return err;
+
+       if (unlikely(!__pskb_pull_tail(to, to->data_len)))
+                       BUG();
+
+       if (unlikely(to->data_len || (from->len > skb_tailroom(to))))
+               return -EINVAL;
+
+       skb_copy_bits(from, 0, skb_put(to, from->len), from->len);
+
+       *delta = from->len;
+       to->truesize += from->len;
+       return 0;
+}
+#else
+static int __copy_skb(struct sk_buff *to, struct sk_buff *from,
+                     int *delta, bool *headstolen)
+{
+       *headstolen = false;
+       return -EINVAL;
+}
+#endif
+
 static struct sk_buff *reassemble(struct sk_buff *skb)
 {
        struct iphdr *iph = ip_hdr(skb);
        struct tcphdr *tcph = tcp_hdr(skb);
        u32 seq = ntohl(tcph->seq);
        struct stt_percpu *stt_percpu;
-       struct sk_buff *last_skb;
+       struct sk_buff *last_skb, *copied_skb = NULL;
        struct pkt_frag *frag;
        struct pkt_key key;
-       int tot_len;
+       int tot_len, delta = skb->truesize;
+       bool headstolen;
        u32 hash;
 
        tot_len = seq >> STT_SEQ_LEN_SHIFT;
@@ -1103,7 +1192,6 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
                FRAG_CB(skb)->first.set_ecn_ce = false;
                list_add_tail(&frag->lru_node, &stt_percpu->frag_lru);
                stt_percpu->frag_mem_used += skb->truesize;
-
                skb = NULL;
                goto unlock;
        }
@@ -1114,8 +1202,13 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
        last_skb = FRAG_CB(frag->skbs)->first.last_skb;
        if (likely(FRAG_CB(last_skb)->offset + last_skb->len ==
                   FRAG_CB(skb)->offset)) {
-               last_skb->next = skb;
-               FRAG_CB(frag->skbs)->first.last_skb = skb;
+
+               if (!__copy_skb(frag->skbs, skb, &delta, &headstolen)) {
+                       copied_skb = skb;
+               } else {
+                       last_skb->next = skb;
+                       FRAG_CB(frag->skbs)->first.last_skb = skb;
+               }
        } else {
                struct sk_buff *prev = NULL, *next;
 
@@ -1154,8 +1247,15 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
 
        FRAG_CB(frag->skbs)->first.set_ecn_ce |= INET_ECN_is_ce(iph->tos);
        FRAG_CB(frag->skbs)->first.rcvd_len += skb->len;
-       FRAG_CB(frag->skbs)->first.mem_used += skb->truesize;
-       stt_percpu->frag_mem_used += skb->truesize;
+       if (copied_skb)  {
+               if (headstolen) {
+                       skb->len = 0;
+                       skb->data_len = 0;
+                       skb->truesize -= delta;
+               }
+       }
+       stt_percpu->frag_mem_used += delta;
+       FRAG_CB(frag->skbs)->first.mem_used += delta;
 
        if (FRAG_CB(frag->skbs)->first.tot_len ==
            FRAG_CB(frag->skbs)->first.rcvd_len) {
@@ -1174,6 +1274,8 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
                skb = NULL;
        }
 
+       if (copied_skb)
+               kfree_skb_partial(copied_skb, headstolen);
        goto unlock;
 
 unlock_free:
-- 
2.5.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to