Reduce host CPU overhead of Tx packet processing: * Use local variables inside per packet loop instead of fields in structs. * Factor book keeping and conditionals out of the per packet loop where possible. * Post buffers to the nic at a maximum of every 64 packets
Signed-off-by: Nelson Escobar <neescoba at cisco.com> Signed-off-by: John Daley <johndale at cisco.com> --- drivers/net/enic/base/vnic_wq.h | 1 + drivers/net/enic/enic_res.h | 2 +- drivers/net/enic/enic_rxtx.c | 167 +++++++++++++++++++--------------------- 3 files changed, 83 insertions(+), 87 deletions(-) diff --git a/drivers/net/enic/base/vnic_wq.h b/drivers/net/enic/base/vnic_wq.h index 689b81c..7a66813 100644 --- a/drivers/net/enic/base/vnic_wq.h +++ b/drivers/net/enic/base/vnic_wq.h @@ -67,6 +67,7 @@ struct vnic_wq_ctrl { /* 16 bytes */ struct vnic_wq_buf { + struct rte_mempool *pool; void *mb; }; diff --git a/drivers/net/enic/enic_res.h b/drivers/net/enic/enic_res.h index 955db71..3c8e303 100644 --- a/drivers/net/enic/enic_res.h +++ b/drivers/net/enic/enic_res.h @@ -53,7 +53,7 @@ #define ENIC_NON_TSO_MAX_DESC 16 #define ENIC_DEFAULT_RX_FREE_THRESH 32 -#define ENIC_TX_POST_THRESH (ENIC_MIN_WQ_DESCS / 2) +#define ENIC_TX_XMIT_MAX 64 #define ENIC_SETTING(enic, f) ((enic->config.flags & VENETF_##f) ? 1 : 0) diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c index ec8d90a..ba15670 100644 --- a/drivers/net/enic/enic_rxtx.c +++ b/drivers/net/enic/enic_rxtx.c @@ -374,114 +374,109 @@ unsigned int enic_cleanup_wq(__rte_unused struct enic *enic, struct vnic_wq *wq) return 0; } -void enic_post_wq_index(struct vnic_wq *wq) -{ - enic_vnic_post_wq_index(wq); -} - -void enic_send_pkt(struct enic *enic, struct vnic_wq *wq, - struct rte_mbuf *tx_pkt, unsigned short len, - uint8_t sop, uint8_t eop, uint8_t cq_entry, - uint16_t ol_flags, uint16_t vlan_tag) -{ - struct wq_enet_desc *desc, *descs; - uint16_t mss = 0; - uint8_t vlan_tag_insert = 0; - uint64_t bus_addr = (dma_addr_t) - (tx_pkt->buf_physaddr + tx_pkt->data_off); - - descs = (struct wq_enet_desc *)wq->ring.descs; - desc = descs + wq->head_idx; - - if (sop) { - if (ol_flags & PKT_TX_VLAN_PKT) - vlan_tag_insert = 1; - - if (enic->hw_ip_checksum) { - if (ol_flags & PKT_TX_IP_CKSUM) - mss |= ENIC_CALC_IP_CKSUM; - - if (ol_flags & PKT_TX_TCP_UDP_CKSUM) - mss |= ENIC_CALC_TCP_UDP_CKSUM; - } - } - - wq_enet_desc_enc(desc, - bus_addr, - len, - mss, - 0 /* header_length */, - 0 /* offload_mode WQ_ENET_OFFLOAD_MODE_CSUM */, - eop, - cq_entry, - 0 /* fcoe_encap */, - vlan_tag_insert, - vlan_tag, - 0 /* loopback */); - - enic_vnic_post_wq(wq, (void *)tx_pkt, cq_entry); -} - uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { uint16_t index; - unsigned int frags; - unsigned int pkt_len; - unsigned int seg_len; - unsigned int inc_len; + unsigned int pkt_len, data_len; unsigned int nb_segs; - struct rte_mbuf *tx_pkt, *next_tx_pkt; + struct rte_mbuf *tx_pkt; struct vnic_wq *wq = (struct vnic_wq *)tx_queue; struct enic *enic = vnic_dev_priv(wq->vdev); unsigned short vlan_id; unsigned short ol_flags; - uint8_t last_seg, eop; - unsigned int host_tx_descs = 0; + unsigned int wq_desc_avail; + int head_idx; + struct vnic_wq_buf *buf; + unsigned int hw_ip_cksum_enabled; + unsigned int desc_count; + struct wq_enet_desc *descs, *desc_p, desc_tmp; + uint16_t mss; + uint8_t vlan_tag_insert; + uint8_t eop; + uint64_t bus_addr; + enic_cleanup_wq(enic, wq); + wq_desc_avail = vnic_wq_desc_avail(wq); + head_idx = wq->head_idx; + desc_count = wq->ring.desc_count; + + nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX); + + hw_ip_cksum_enabled = enic->hw_ip_checksum; for (index = 0; index < nb_pkts; index++) { tx_pkt = *tx_pkts++; - inc_len = 0; nb_segs = tx_pkt->nb_segs; - if (nb_segs > vnic_wq_desc_avail(wq)) { + if (nb_segs > wq_desc_avail) { if (index > 0) - enic_post_wq_index(wq); - - /* wq cleanup and try again */ - if (!enic_cleanup_wq(enic, wq) || - (nb_segs > vnic_wq_desc_avail(wq))) { - return index; - } + goto post; + goto done; } pkt_len = tx_pkt->pkt_len; + data_len = tx_pkt->data_len; vlan_id = tx_pkt->vlan_tci; ol_flags = tx_pkt->ol_flags; - for (frags = 0; inc_len < pkt_len; frags++) { - if (!tx_pkt) - break; - next_tx_pkt = tx_pkt->next; - seg_len = tx_pkt->data_len; - inc_len += seg_len; - - host_tx_descs++; - last_seg = 0; - eop = 0; - if ((pkt_len == inc_len) || !next_tx_pkt) { - eop = 1; - /* post if last packet in batch or > thresh */ - if ((index == (nb_pkts - 1)) || - (host_tx_descs > ENIC_TX_POST_THRESH)) { - last_seg = 1; - host_tx_descs = 0; - } + + mss = 0; + vlan_tag_insert = 0; + bus_addr = (dma_addr_t) + (tx_pkt->buf_physaddr + tx_pkt->data_off); + + descs = (struct wq_enet_desc *)wq->ring.descs; + desc_p = descs + head_idx; + + eop = (data_len == pkt_len); + + if (ol_flags & PKT_TX_VLAN_PKT) + vlan_tag_insert = 1; + + if (hw_ip_cksum_enabled && (ol_flags & PKT_TX_IP_CKSUM)) + mss |= ENIC_CALC_IP_CKSUM; + + if (hw_ip_cksum_enabled && (ol_flags & PKT_TX_TCP_UDP_CKSUM)) + mss |= ENIC_CALC_TCP_UDP_CKSUM; + + wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0, 0, eop, + eop, 0, vlan_tag_insert, vlan_id, 0); + + *desc_p = desc_tmp; + buf = &wq->bufs[head_idx]; + buf->mb = (void *)tx_pkt; + head_idx = enic_ring_incr(desc_count, head_idx); + wq_desc_avail--; + + if (!eop) { + for (tx_pkt = tx_pkt->next; tx_pkt; tx_pkt = + tx_pkt->next) { + data_len = tx_pkt->data_len; + + if (tx_pkt->next == NULL) + eop = 1; + desc_p = descs + head_idx; + bus_addr = (dma_addr_t)(tx_pkt->buf_physaddr + + tx_pkt->data_off); + wq_enet_desc_enc((struct wq_enet_desc *) + &desc_tmp, bus_addr, data_len, + mss, 0, 0, eop, eop, 0, + vlan_tag_insert, vlan_id, 0); + + *desc_p = desc_tmp; + buf = &wq->bufs[head_idx]; + buf->mb = (void *)tx_pkt; + head_idx = enic_ring_incr(desc_count, head_idx); + wq_desc_avail--; } - enic_send_pkt(enic, wq, tx_pkt, (unsigned short)seg_len, - !frags, eop, last_seg, ol_flags, vlan_id); - tx_pkt = next_tx_pkt; } } + post: + rte_wmb(); + iowrite32(head_idx, &wq->ctrl->posted_index); + done: + wq->ring.desc_avail = wq_desc_avail; + wq->head_idx = head_idx; - enic_cleanup_wq(enic, wq); return index; } + + -- 2.7.0