Linearize Tx mbuf chains in the info array. This avoids walking the mbuf chain during flush. Move a few branches out of the hot path.
Signed-off-by: Andrew Boyer <andrew.bo...@amd.com> --- drivers/net/ionic/ionic_lif.c | 2 +- drivers/net/ionic/ionic_rxtx.c | 143 ++++++++++++++++++++------------- 2 files changed, 87 insertions(+), 58 deletions(-) diff --git a/drivers/net/ionic/ionic_lif.c b/drivers/net/ionic/ionic_lif.c index db5d42dda6..ac9b69fc70 100644 --- a/drivers/net/ionic/ionic_lif.c +++ b/drivers/net/ionic/ionic_lif.c @@ -817,7 +817,7 @@ ionic_tx_qcq_alloc(struct ionic_lif *lif, uint32_t socket_id, uint32_t index, "tx", flags, ntxq_descs, - 1, + num_segs_fw, sizeof(struct ionic_txq_desc), sizeof(struct ionic_txq_comp), sizeof(struct ionic_txq_sg_desc_v1), diff --git a/drivers/net/ionic/ionic_rxtx.c b/drivers/net/ionic/ionic_rxtx.c index bb6ca019d9..53b0add228 100644 --- a/drivers/net/ionic/ionic_rxtx.c +++ b/drivers/net/ionic/ionic_rxtx.c @@ -64,7 +64,7 @@ ionic_tx_empty(struct ionic_tx_qcq *txq) { struct ionic_queue *q = &txq->qcq.q; - ionic_empty_array(q->info, q->num_descs, 0); + ionic_empty_array(q->info, q->num_descs * q->num_segs, 0); } static void __rte_cold @@ -102,50 +102,49 @@ ionic_tx_flush(struct ionic_tx_qcq *txq) { struct ionic_cq *cq = &txq->qcq.cq; struct ionic_queue *q = &txq->qcq.q; - struct rte_mbuf *txm, *next; - struct ionic_txq_comp *cq_desc_base = cq->base; - struct ionic_txq_comp *cq_desc; + struct rte_mbuf *txm; + struct ionic_txq_comp *cq_desc, *cq_desc_base = cq->base; void **info; - u_int32_t comp_index = (u_int32_t)-1; + uint32_t i; cq_desc = &cq_desc_base[cq->tail_idx]; + while (color_match(cq_desc->color, cq->done_color)) { cq->tail_idx = Q_NEXT_TO_SRVC(cq, 1); - - /* Prefetch the next 4 descriptors (not really useful here) */ - if ((cq->tail_idx & 0x3) == 0) - rte_prefetch0(&cq_desc_base[cq->tail_idx]); - if (cq->tail_idx == 0) cq->done_color = !cq->done_color; - comp_index = cq_desc->comp_index; + /* Prefetch 4 x 16B comp at cq->tail_idx + 4 */ + if ((cq->tail_idx & 0x3) == 0) + rte_prefetch0(&cq_desc_base[Q_NEXT_TO_SRVC(cq, 4)]); - cq_desc = &cq_desc_base[cq->tail_idx]; - } + while (q->tail_idx != rte_le_to_cpu_16(cq_desc->comp_index)) { + /* Prefetch 8 mbuf ptrs at q->tail_idx + 2 */ + rte_prefetch0(IONIC_INFO_PTR(q, Q_NEXT_TO_SRVC(q, 2))); - if (comp_index != (u_int32_t)-1) { - while (q->tail_idx != comp_index) { - info = IONIC_INFO_PTR(q, q->tail_idx); + /* Prefetch next mbuf */ + void **next_info = + IONIC_INFO_PTR(q, Q_NEXT_TO_SRVC(q, 1)); + if (next_info[0]) + rte_mbuf_prefetch_part2(next_info[0]); + if (next_info[1]) + rte_mbuf_prefetch_part2(next_info[1]); - q->tail_idx = Q_NEXT_TO_SRVC(q, 1); + info = IONIC_INFO_PTR(q, q->tail_idx); + for (i = 0; i < q->num_segs; i++) { + txm = info[i]; + if (!txm) + break; - /* Prefetch the next 4 descriptors */ - if ((q->tail_idx & 0x3) == 0) - /* q desc info */ - rte_prefetch0(&q->info[q->tail_idx]); - - /* - * Note: you can just use rte_pktmbuf_free, - * but this loop is faster - */ - txm = info[0]; - while (txm != NULL) { - next = txm->next; rte_pktmbuf_free_seg(txm); - txm = next; + + info[i] = NULL; } + + q->tail_idx = Q_NEXT_TO_SRVC(q, 1); } + + cq_desc = &cq_desc_base[cq->tail_idx]; } } @@ -327,9 +326,12 @@ ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc, uint16_t vlan_tci, bool has_vlan, bool start, bool done) { + struct rte_mbuf *txm_seg; void **info; uint64_t cmd; uint8_t flags = 0; + int i; + flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0; flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0; flags |= start ? IONIC_TXQ_DESC_FLAG_TSO_SOT : 0; @@ -345,7 +347,13 @@ ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc, if (done) { info = IONIC_INFO_PTR(q, q->head_idx); - info[0] = txm; + + /* Walk the mbuf chain to stash pointers in the array */ + txm_seg = txm; + for (i = 0; i < txm->nb_segs; i++) { + info[i] = txm_seg; + txm_seg = txm_seg->next; + } } q->head_idx = Q_NEXT_TO_POST(q, 1); @@ -497,8 +505,7 @@ ionic_tx(struct ionic_tx_qcq *txq, struct rte_mbuf *txm) struct ionic_tx_stats *stats = &txq->stats; struct rte_mbuf *txm_seg; void **info; - bool encap; - bool has_vlan; + rte_iova_t data_iova; uint64_t ol_flags = txm->ol_flags; uint64_t addr, cmd; uint8_t opcode = IONIC_TXQ_DESC_OPCODE_CSUM_NONE; @@ -524,32 +531,44 @@ ionic_tx(struct ionic_tx_qcq *txq, struct rte_mbuf *txm) if (opcode == IONIC_TXQ_DESC_OPCODE_CSUM_NONE) stats->no_csum++; - has_vlan = (ol_flags & RTE_MBUF_F_TX_VLAN); - encap = ((ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) || - (ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM)) && - ((ol_flags & RTE_MBUF_F_TX_OUTER_IPV4) || - (ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)); + if (((ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) || + (ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM)) && + ((ol_flags & RTE_MBUF_F_TX_OUTER_IPV4) || + (ol_flags & RTE_MBUF_F_TX_OUTER_IPV6))) { + flags |= IONIC_TXQ_DESC_FLAG_ENCAP; + } - flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0; - flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0; + if (ol_flags & RTE_MBUF_F_TX_VLAN) { + flags |= IONIC_TXQ_DESC_FLAG_VLAN; + desc->vlan_tci = rte_cpu_to_le_16(txm->vlan_tci); + } addr = rte_cpu_to_le_64(rte_mbuf_data_iova(txm)); cmd = encode_txq_desc_cmd(opcode, flags, txm->nb_segs - 1, addr); desc->cmd = rte_cpu_to_le_64(cmd); desc->len = rte_cpu_to_le_16(txm->data_len); - desc->vlan_tci = rte_cpu_to_le_16(txm->vlan_tci); info[0] = txm; - elem = sg_desc_base[q->head_idx].elems; + if (txm->nb_segs > 1) { + txm_seg = txm->next; - txm_seg = txm->next; - while (txm_seg != NULL) { - elem->len = rte_cpu_to_le_16(txm_seg->data_len); - elem->addr = rte_cpu_to_le_64(rte_mbuf_data_iova(txm_seg)); - elem++; - txm_seg = txm_seg->next; + elem = sg_desc_base[q->head_idx].elems; + + while (txm_seg != NULL) { + /* Stash the mbuf ptr in the array */ + info++; + *info = txm_seg; + + /* Configure the SGE */ + data_iova = rte_mbuf_data_iova(txm_seg); + elem->len = rte_cpu_to_le_16(txm_seg->data_len); + elem->addr = rte_cpu_to_le_64(data_iova); + elem++; + + txm_seg = txm_seg->next; + } } q->head_idx = Q_NEXT_TO_POST(q, 1); @@ -565,11 +584,19 @@ ionic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, struct ionic_queue *q = &txq->qcq.q; struct ionic_tx_stats *stats = &txq->stats; struct rte_mbuf *mbuf; - uint32_t next_q_head_idx; uint32_t bytes_tx = 0; uint16_t nb_avail, nb_tx = 0; int err; + struct ionic_txq_desc *desc_base = q->base; + rte_prefetch0(&desc_base[q->head_idx]); + rte_prefetch0(IONIC_INFO_PTR(q, q->head_idx)); + + if (tx_pkts) { + rte_mbuf_prefetch_part1(tx_pkts[0]); + rte_mbuf_prefetch_part2(tx_pkts[0]); + } + /* Cleaning old buffers */ ionic_tx_flush(txq); @@ -580,11 +607,13 @@ ionic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, } while (nb_tx < nb_pkts) { - next_q_head_idx = Q_NEXT_TO_POST(q, 1); - if ((next_q_head_idx & 0x3) == 0) { - struct ionic_txq_desc *desc_base = q->base; - rte_prefetch0(&desc_base[next_q_head_idx]); - rte_prefetch0(&q->info[next_q_head_idx]); + uint16_t next_idx = Q_NEXT_TO_POST(q, 1); + rte_prefetch0(&desc_base[next_idx]); + rte_prefetch0(IONIC_INFO_PTR(q, next_idx)); + + if (nb_tx + 1 < nb_pkts) { + rte_mbuf_prefetch_part1(tx_pkts[nb_tx + 1]); + rte_mbuf_prefetch_part2(tx_pkts[nb_tx + 1]); } mbuf = tx_pkts[nb_tx]; @@ -605,10 +634,10 @@ ionic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, if (nb_tx > 0) { rte_wmb(); ionic_q_flush(q); - } - stats->packets += nb_tx; - stats->bytes += bytes_tx; + stats->packets += nb_tx; + stats->bytes += bytes_tx; + } return nb_tx; } -- 2.17.1