From: Divy Le Ray <[EMAIL PROTECTED]>

Add all-in-sw lro support.

Signed-off-by: Divy Le Ray <[EMAIL PROTECTED]>
---

 drivers/net/cxgb3/adapter.h     |   21 ++
 drivers/net/cxgb3/common.h      |    1 
 drivers/net/cxgb3/cxgb3_ioctl.h |    1 
 drivers/net/cxgb3/cxgb3_main.c  |   16 ++
 drivers/net/cxgb3/sge.c         |  341 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/cxgb3/t3_cpl.h      |   10 +
 6 files changed, 384 insertions(+), 6 deletions(-)

diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 80c3d8f..576db4a 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -95,6 +95,23 @@ struct sge_fl {                      /* SGE per free-buffer
        unsigned long alloc_failed; /* # of times buffer allocation failed */
 };
 
+/* Max active LRO sessions per queue set */
+#define MAX_LRO_PER_QSET 8
+
+struct sge_lro_session {
+       struct sk_buff *skb;
+       struct sk_buff *skb_last_frag;
+       u32 seq;
+       u16 iplen;
+};
+
+struct sge_lro {
+       unsigned int enabled;
+       unsigned int num_active;
+       struct sge_lro_session *last_s;
+       struct sge_lro_session s[MAX_LRO_PER_QSET];
+};
+
 /*
  * Bundle size for grouping offload RX packets for delivery to the stack.
  * Don't make this too big as we do prefetch on each packet in a bundle.
@@ -164,6 +181,9 @@ enum {                              /* per port SGE 
statistics */
        SGE_PSTAT_TX_CSUM,      /* # of TX checksum offloads */
        SGE_PSTAT_VLANEX,       /* # of VLAN tag extractions */
        SGE_PSTAT_VLANINS,      /* # of VLAN tag insertions */
+       SGE_PSTATS_LRO_QUEUED,  /* # of LRO appended packets */
+       SGE_PSTATS_LRO_FLUSHED, /* # of LRO flushed packets */
+       SGE_PSTATS_LRO_X_STREAMS,       /* # of exceeded LRO contexts */
 
        SGE_PSTAT_MAX           /* must be last */
 };
@@ -171,6 +191,7 @@ enum {                              /* per port SGE 
statistics */
 struct sge_qset {              /* an SGE queue set */
        struct sge_rspq rspq;
        struct sge_fl fl[SGE_RXQ_PER_SET];
+       struct sge_lro lro;
        struct sge_txq txq[SGE_TXQ_PER_SET];
        struct net_device *netdev;      /* associated net device */
        unsigned long txq_stopped;      /* which Tx queues are stopped */
diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h
index e23deeb..1031ad0 100644
--- a/drivers/net/cxgb3/common.h
+++ b/drivers/net/cxgb3/common.h
@@ -322,6 +322,7 @@ struct tp_params {
 
 struct qset_params {           /* SGE queue set parameters */
        unsigned int polling;   /* polling/interrupt service for rspq */
+       unsigned int lro;       /* large receive offload */
        unsigned int coalesce_usecs;    /* irq coalescing timer */
        unsigned int rspq_size; /* # of entries in response queue */
        unsigned int fl_size;   /* # of entries in regular free list */
diff --git a/drivers/net/cxgb3/cxgb3_ioctl.h b/drivers/net/cxgb3/cxgb3_ioctl.h
index 0a82fcd..68200a1 100644
--- a/drivers/net/cxgb3/cxgb3_ioctl.h
+++ b/drivers/net/cxgb3/cxgb3_ioctl.h
@@ -90,6 +90,7 @@ struct ch_qset_params {
        int32_t fl_size[2];
        int32_t intr_lat;
        int32_t polling;
+       int32_t lro;
        int32_t cong_thres;
 };
 
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 7ff834e..b78eefb 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -1031,7 +1031,11 @@ static char stats_strings[][ETH_GSTRING_
        "VLANinsertions     ",
        "TxCsumOffload      ",
        "RxCsumGood         ",
-       "RxDrops            "
+       "RxDrops            ",
+
+       "LroQueued          ",
+       "LroFlushed         ",
+       "LroExceededSessions"
 };
 
 static int get_stats_count(struct net_device *dev)
@@ -1145,6 +1149,9 @@ static void get_stats(struct net_device
        *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_TX_CSUM);
        *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_RX_CSUM_GOOD);
        *data++ = s->rx_cong_drops;
+       *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTATS_LRO_QUEUED);
+       *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTATS_LRO_FLUSHED);
+       *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTATS_LRO_X_STREAMS);
 }
 
 static inline void reg_block_dump(struct adapter *ap, void *buf,
@@ -1624,6 +1631,12 @@ static int cxgb_extension_ioctl(struct n
                                }
                        }
                }
+               if (t.lro >= 0) {
+                       struct sge_qset *qs = &adapter->sge.qs[t.qset_idx];
+                       
+                       q->lro = t.lro;
+                       qs->lro.enabled = t.lro;
+               }
                break;
        }
        case CHELSIO_GET_QSET_PARAMS:{
@@ -1643,6 +1656,7 @@ static int cxgb_extension_ioctl(struct n
                t.fl_size[0] = q->fl_size;
                t.fl_size[1] = q->jumbo_size;
                t.polling = q->polling;
+               t.lro = q->lro;
                t.intr_lat = q->coalesce_usecs;
                t.cong_thres = q->cong_thres;
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index c237834..44c4220 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -35,6 +35,7 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
+#include <net/tcp.h>
 #include <linux/dma-mapping.h>
 #include "common.h"
 #include "regs.h"
@@ -1710,6 +1711,324 @@ static void rx_eth(struct adapter *adap,
                netif_rx(skb);
 }
 
+#define IPH_OFFSET (2 + sizeof (struct cpl_rx_pkt) + ETH_HLEN)
+#define SKB_HASHVAL(skb) (skb->priority)
+#define LRO_SESSION_IDX_HINT(skb) (SKB_HASHVAL(skb) & (MAX_LRO_PER_QSET - 1))
+#define LRO_SESSION_IDX_HINT_HASH(hash) (hash & (MAX_LRO_PER_QSET - 1))
+#define LRO_IDX_INC(idx) idx = (idx + 1) & (MAX_LRO_PER_QSET - 1)
+
+static inline struct sge_lro_session *lro_session(struct sge_lro *l, int idx)
+{
+       return l->s + idx;
+}
+
+static inline int lro_match_session(struct sge_lro_session *s,
+                                   struct iphdr *iph, struct tcphdr *tcph)
+{
+       struct iphdr *s_iph = (struct iphdr *)(s->skb->data + IPH_OFFSET);
+       struct tcphdr *s_tcph = (struct tcphdr *)(s_iph + 1);
+
+       return *(u32 *) & tcph->source == *(u32 *) & s_tcph->source &&
+           iph->saddr == s_iph->saddr && iph->daddr == s_iph->daddr;
+}
+
+static inline struct sge_lro_session *lro_find_session(struct sge_lro *l,
+                                                      int idx,
+                                                      struct iphdr *iph,
+                                                      struct tcphdr *tcph)
+{
+       struct sge_lro_session *s;
+       int active = 0;
+
+       while (active < l->num_active) {
+               s = lro_session(l, idx);
+               if (s->skb) {
+                       if (lro_match_session(s, iph, tcph)) {
+                               l->last_s = s;
+                               return s;
+                       }
+                       active++;
+               }
+               LRO_IDX_INC(idx);
+       }
+
+       return NULL;
+}
+
+static inline void lro_new_session_init(struct sge_lro_session *s,
+                                       struct sk_buff *skb)
+{
+       struct iphdr *ih = (struct iphdr *)(skb->data + IPH_OFFSET);
+       struct tcphdr *th = (struct tcphdr *)(ih + 1);
+       int iplen = ntohs(ih->tot_len);
+
+       s->skb = skb;
+       s->iplen = iplen;
+       s->seq = ntohl(th->seq) + iplen - sizeof(*ih) - (th->doff << 2);
+}
+
+static void lro_flush_session(struct adapter *adap, struct sge_qset *qs,
+                             struct sge_lro_session *s, struct sk_buff *skb)
+{
+       struct sge_lro *l = &qs->lro;
+       struct iphdr *ih = (struct iphdr *)(s->skb->data + IPH_OFFSET);
+
+       ih->tot_len = htons(s->iplen);
+       ih->check = 0;
+       ih->check = ip_fast_csum((unsigned char *)ih, ih->ihl);
+
+       rx_eth(adap, &qs->rspq, s->skb, 2);
+
+       s->skb = skb;
+       if (skb)
+               lro_new_session_init(s, skb);
+       else
+               l->num_active--;
+
+       qs->port_stats[SGE_PSTATS_LRO_FLUSHED]++;
+}
+
+static inline struct sge_lro_session *lro_new_session(struct adapter *adap,
+                                                     struct sge_qset *qs,
+                                                     struct sk_buff *skb)
+{
+       struct sge_lro *l = &qs->lro;
+       int idx = LRO_SESSION_IDX_HINT(skb);
+       struct sge_lro_session *s = lro_session(l, idx);
+
+       if (likely(!s->skb))
+               goto done;
+
+       BUG_ON(l->num_active > MAX_LRO_PER_QSET);
+       if (l->num_active == MAX_LRO_PER_QSET) {
+               lro_flush_session(adap, qs, s, skb);
+               qs->port_stats[SGE_PSTATS_LRO_X_STREAMS]++;
+               return s;
+       }
+
+       while (1) {
+               LRO_IDX_INC(idx);
+               s = lro_session(l, idx);
+               if (!s->skb)
+                       break;
+       }
+
+done:
+       lro_new_session_init(s, skb);
+
+       l->num_active++;
+       return s;
+}
+
+static inline void sge_lro_flush_all(struct adapter *adap, struct sge_qset *qs)
+{
+       struct sge_lro *l = &qs->lro;
+       struct sge_lro_session *s = l->last_s;
+       int active = 0, idx = 0, num_active = l->num_active;
+
+       if (unlikely(!s))
+               s = lro_session(l, idx);
+
+       while (active < num_active) {
+               if (s->skb) {
+                       lro_flush_session(adap, qs, s, NULL);
+                       active++;
+               }
+               LRO_IDX_INC(idx);
+               s = lro_session(l, idx);
+       }
+}
+
+static inline int can_lro_packet(struct cpl_rx_pkt *cpl, unsigned int rss_hi)
+{
+       struct ethhdr *eh = (struct ethhdr *)(cpl + 1);
+       struct iphdr *ih = (struct iphdr *)(eh + 1);
+
+       if (unlikely(G_HASHTYPE(ntohl(rss_hi)) != RSS_HASH_4_TUPLE ||
+                    (*((u8 *) cpl + 1) & 0x90) != 0x10 ||
+                    cpl->csum != 0xffff || eh->h_proto != ntohs(ETH_P_IP) ||
+                    ih->ihl != (sizeof(*ih) >> 2))) {
+               return 0;
+       }
+
+       return 1;
+}
+
+static inline int can_lro_tcpsegment(struct tcphdr *th)
+{
+       int olen = (th->doff << 2) - sizeof(*th);
+       u8 control_bits = *((u8 *) th + 13);
+
+       if (unlikely((control_bits & 0xB7) != 0x10))
+               goto no_lro;
+
+       if (olen) {
+               u32 *ptr = (u32 *) (th + 1);
+               if (unlikely(olen != TCPOLEN_TSTAMP_ALIGNED ||
+                            *ptr != ntohl((TCPOPT_NOP << 24) |
+                                          (TCPOPT_NOP << 16) |
+                                          (TCPOPT_TIMESTAMP << 8) |
+                                          TCPOLEN_TIMESTAMP)))
+                       goto no_lro;
+       }
+
+       return 1;
+
+no_lro:
+       return 0;
+}
+
+static inline int lro_update_session(struct sge_lro_session *s,
+                                    unsigned char *va,
+                                    struct skb_frag_struct *frag,
+                                    struct sk_buff *skb)
+{
+       struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(s->skb->data + 2);
+       struct cpl_rx_pkt *ncpl = (struct cpl_rx_pkt *)(va + 2);
+       struct iphdr *nih = (struct iphdr *)(va + IPH_OFFSET);
+       struct tcphdr *th, *nth = (struct tcphdr *)(nih + 1);
+       u32 seq = ntohl(nth->seq);
+       int plen, tcpiphlen, olen = (nth->doff << 2) - sizeof(*nth);
+
+       if (cpl->vlan_valid && cpl->vlan != ncpl->vlan)
+               return -1;
+
+       if (unlikely(seq != s->seq))
+               return -1;
+
+       th = (struct tcphdr *)(s->skb->data + IPH_OFFSET +
+                              sizeof(struct iphdr));
+
+       if (olen) {
+               u32 *ptr = (u32 *) (th + 1), *nptr = (u32 *) (nth + 1);
+
+               if (unlikely(ntohl(*(ptr + 1)) > ntohl(*(nptr + 1)) ||
+                            !*(nptr + 2)))
+                       return -1;
+
+               *(ptr + 1) = *(nptr + 1);
+               *(ptr + 2) = *(nptr + 2);
+       }
+       th->ack_seq = nth->ack_seq;
+       th->window = nth->window;
+
+       tcpiphlen = (nth->doff << 2) + sizeof(*nih);
+       plen = ntohs(nih->tot_len) - tcpiphlen;
+       s->seq += plen;
+       s->iplen += plen;
+       s->skb->data_len += plen;
+       s->skb->len += plen;
+       s->skb->truesize += plen;
+
+       if (plen > skb_shinfo(s->skb)->gso_size)
+               skb_shinfo(s->skb)->gso_size = plen;
+
+       if (unlikely(skb)) {
+               skb_pull(skb, skb->len - plen);
+               if (unlikely(!skb_shinfo(s->skb)->frag_list))
+                       skb_shinfo(s->skb)->frag_list = skb;
+               else
+                       s->skb_last_frag->next = skb;
+               s->skb_last_frag = skb;
+       } else {
+               int nr = skb_shinfo(s->skb)->nr_frags;
+               skb_shinfo(s->skb)->frags[nr].page = frag->page;
+               skb_shinfo(s->skb)->frags[nr].page_offset =
+                   frag->page_offset + IPH_OFFSET + tcpiphlen;
+               skb_shinfo(s->skb)->frags[nr].size = plen;
+               skb_shinfo(s->skb)->nr_frags = ++nr;
+       }
+
+       return 0;
+}
+
+static inline int rx_eth_lro_page(struct adapter *adap, struct sge_qset *qs,
+                                 struct sge_fl_page *p, u32 hash, u32 csum,
+                                 int *lro)
+{
+       struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(p->va + 2);
+       struct iphdr *ih;
+       struct tcphdr *th;
+       struct sge_lro_session *s = NULL;
+
+       if (!can_lro_packet(cpl, csum)) {
+               *lro = 0;
+               goto no_lro;
+       }
+
+       ih = (struct iphdr *)(p->va + IPH_OFFSET);
+       th = (struct tcphdr *)(ih + 1);
+       s = lro_find_session(&qs->lro, LRO_SESSION_IDX_HINT_HASH(hash), ih, th);
+       if (unlikely(!s))
+               goto no_lro;
+
+       /* If we already started LRO via chaining skbs, keep doing it that way.
+        */
+       if (unlikely(skb_shinfo(s->skb)->frag_list))
+               return -1;
+
+       if (unlikely(!can_lro_tcpsegment(th)))
+               goto no_lro;
+
+       if (lro_update_session(s, p->va, &p->frag, NULL))
+               goto no_lro;
+
+       if (unlikely(skb_shinfo(s->skb)->nr_frags == MAX_SKB_FRAGS ||
+                    s->skb->len + qs->netdev->mtu > 65535))
+               lro_flush_session(adap, qs, s, NULL);
+
+       qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
+
+       return 0;
+
+no_lro:
+       if (s)
+               lro_flush_session(adap, qs, s, NULL);
+
+       return -1;
+}
+
+static void rx_eth_lro_skb(struct adapter *adap, struct sge_rspq *rq,
+                          struct sk_buff *skb, int ethpad)
+{
+       struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(skb->data + ethpad);
+       struct sge_qset *qs = rspq_to_qset(rq);
+       struct iphdr *ih;
+       struct tcphdr *th;
+       struct sge_lro_session *s = NULL;
+
+       if (!can_lro_packet(cpl, skb->csum))
+               goto no_lro;
+
+       ih = (struct iphdr *)(skb->data + IPH_OFFSET);
+       th = (struct tcphdr *)(ih + 1);
+       s = lro_find_session(&qs->lro,
+                            LRO_SESSION_IDX_HINT_HASH(skb->priority), ih, th);
+
+       if (unlikely(!can_lro_tcpsegment(th)))
+               goto no_lro;
+       else if (unlikely(!s))
+               s = lro_new_session(adap, qs, skb);
+       else {
+               if (lro_update_session(s, skb->data, NULL, skb)) {
+                       lro_flush_session(adap, qs, s, skb);
+                       return;
+               }
+
+               if (unlikely(s->skb->len + qs->netdev->mtu > 65535))
+                       lro_flush_session(adap, qs, s, NULL);
+       }
+
+       qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
+       return;
+
+no_lro:
+       if (s)
+               lro_flush_session(adap, qs, s, NULL);
+
+       rx_eth(adap, rq, skb, ethpad);
+}
+
 #define SKB_DATA_SIZE 128
 
 static void skb_data_init(struct sk_buff *skb, struct sge_fl_page *p,
@@ -1911,7 +2230,7 @@ static int process_responses(struct adap
        q->next_holdoff = q->holdoff_tmr;
 
        while (likely(budget_left && is_new_response(r, q))) {
-               int eth, ethpad = 2;
+               int eth, ethpad = 2, lro = qs->lro.enabled;
                struct sk_buff *skb = NULL;
                u32 len, flags = ntohl(r->flags);
                u32 rss_hi = *(const u32 *)r, rss_lo = r->rss_hdr.rss_hash_val;
@@ -1961,6 +2280,13 @@ static int process_responses(struct adap
                                        if (unlikely(fl->credits <
                                                     SGE_RX_DROP_THRES))
                                                goto eth_recycle;
+                                       
+                                       if (likely(lro &&
+                                                  !rx_eth_lro_page(adap, qs,
+                                                                   p, rss_lo,
+                                                                   rss_hi,
+                                                                   &lro)))
+                                               goto eth_done;
 
                                        skb = alloc_skb(SKB_DATA_SIZE,
                                                        GFP_ATOMIC);
@@ -2016,9 +2342,12 @@ eth_done:
                        skb->csum = rss_hi;
                        skb->priority = rss_lo;
 
-                       if (eth)
-                               rx_eth(adap, q, skb, ethpad);
-                       else {
+                       if (eth) {
+                               if (likely(lro))
+                                       rx_eth_lro_skb(adap, q, skb, ethpad);
+                               else
+                                       rx_eth(adap, q, skb, ethpad);
+                       } else {
                                if (unlikely(r->rss_hdr.opcode ==
                                             CPL_TRACE_PKT))
                                        __skb_pull(skb, ethpad);
@@ -2030,7 +2359,7 @@ eth_done:
                }
                --budget_left;
        }
-
+       sge_lro_flush_all(adap, qs);
        deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
        if (sleeping)
                check_ring_db(adap, qs, sleeping);
@@ -2698,6 +3027,7 @@ int t3_sge_alloc_qset(struct adapter *ad
        spin_unlock(&adapter->sge.reg_lock);
        q->netdev = netdev;
        t3_update_qset_coalesce(q, p);
+       q->lro.enabled = p->lro;
 
        /*
         * We use atalk_ptr as a backpointer to a qset.  In case a device is
@@ -2839,6 +3169,7 @@ void __devinit t3_sge_prep(struct adapte
 
                q->polling = adap->params.rev > 0;
                q->coalesce_usecs = 5;
+               q->lro = 1;
                q->rspq_size = 1024;
                q->fl_size = 1024;
                q->jumbo_size = 512;
diff --git a/drivers/net/cxgb3/t3_cpl.h b/drivers/net/cxgb3/t3_cpl.h
index b7a1a31..0f9f67d 100644
--- a/drivers/net/cxgb3/t3_cpl.h
+++ b/drivers/net/cxgb3/t3_cpl.h
@@ -174,6 +174,12 @@ enum {                             /* TCP congestion 
control algo
        CONG_ALG_HIGHSPEED
 };
 
+enum {                         /* RSS hash type */
+       RSS_HASH_NONE = 0,
+       RSS_HASH_2_TUPLE = 1 << 0,
+       RSS_HASH_4_TUPLE = 1 << 1
+};
+
 union opcode_tid {
        __be32 opcode_tid;
        __u8 opcode;
@@ -184,6 +190,10 @@ union opcode_tid {
 #define G_OPCODE(x) (((x) >> S_OPCODE) & 0xFF)
 #define G_TID(x)    ((x) & 0xFFFFFF)
 
+#define S_HASHTYPE 22
+#define M_HASHTYPE 0x3
+#define G_HASHTYPE(x) (((x) >> S_HASHTYPE) & M_HASHTYPE)
+
 /* tid is assumed to be 24-bits */
 #define MK_OPCODE_TID(opcode, tid) (V_OPCODE(opcode) | (tid))
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to