The IDPF split-queue completion queue was using ci_tx_queue, a structure designed for TX descriptor queues, wasting ~96 bytes per completion queue. Additionally, the CQ-only fields (compl_ring, txqs, tx_start_qid, expected_gen_id) bloated ci_tx_queue for every other Intel driver that shares it.
Introduce struct idpf_complq with exactly the fields needed by the completion queue. This brings the CQ allocation down from ~150 bytes to ~48 bytes. Suggested-by: Bruce Richardson <[email protected]> Signed-off-by: Shaiq Wani <[email protected]> --- drivers/net/intel/common/tx.h | 11 +++-------- drivers/net/intel/cpfl/cpfl_ethdev.h | 2 +- drivers/net/intel/cpfl/cpfl_rxtx.c | 11 ++++++----- drivers/net/intel/idpf/idpf_common_rxtx.c | 4 ++-- drivers/net/intel/idpf/idpf_common_rxtx.h | 16 +++++++++++++++- drivers/net/intel/idpf/idpf_common_rxtx_avx2.c | 4 ++-- drivers/net/intel/idpf/idpf_common_rxtx_avx512.c | 4 ++-- drivers/net/intel/idpf/idpf_rxtx.c | 2 +- 8 files changed, 32 insertions(+), 22 deletions(-) diff --git a/drivers/net/intel/common/tx.h b/drivers/net/intel/common/tx.h index 283bd58d5d..9da9366046 100644 --- a/drivers/net/intel/common/tx.h +++ b/drivers/net/intel/common/tx.h @@ -114,6 +114,7 @@ struct ci_tx_desc { /* forward declaration of the common intel (ci) queue structure */ struct ci_tx_queue; +struct idpf_complq; /** * Structure associated with each descriptor of the TX ring of a TX queue. @@ -209,18 +210,12 @@ struct ci_tx_queue { uint8_t vf_ctx_initialized; /**< VF context descriptors initialized */ }; struct { /* idpf specific values */ - volatile union { - struct idpf_flex_tx_sched_desc *desc_ring; - struct idpf_splitq_tx_compl_desc *compl_ring; - }; - struct ci_tx_queue *complq; - void **txqs; /*only valid for split queue mode*/ - uint32_t tx_start_qid; + struct idpf_flex_tx_sched_desc *desc_ring; + struct idpf_complq *complq; uint32_t latch_idx; /* Tx timestamp latch index */ uint16_t sw_nb_desc; uint16_t sw_tail; uint16_t rs_compl_count; - uint8_t expected_gen_id; }; }; }; diff --git a/drivers/net/intel/cpfl/cpfl_ethdev.h b/drivers/net/intel/cpfl/cpfl_ethdev.h index e05a0901d5..d26b2bb0dc 100644 --- a/drivers/net/intel/cpfl/cpfl_ethdev.h +++ b/drivers/net/intel/cpfl/cpfl_ethdev.h @@ -188,7 +188,7 @@ struct cpfl_vport { uint16_t nb_p2p_txq; struct idpf_rx_queue *p2p_rx_bufq; - struct ci_tx_queue *p2p_tx_complq; + struct idpf_complq *p2p_tx_complq; bool p2p_manual_bind; }; diff --git a/drivers/net/intel/cpfl/cpfl_rxtx.c b/drivers/net/intel/cpfl/cpfl_rxtx.c index ad622b267d..e7e370a208 100644 --- a/drivers/net/intel/cpfl/cpfl_rxtx.c +++ b/drivers/net/intel/cpfl/cpfl_rxtx.c @@ -27,7 +27,7 @@ cpfl_tx_hairpin_descq_reset(struct ci_tx_queue *txq) } static inline void -cpfl_tx_hairpin_complq_reset(struct ci_tx_queue *cq) +cpfl_tx_hairpin_complq_reset(struct idpf_complq *cq) { uint32_t i, size; @@ -483,7 +483,7 @@ cpfl_tx_complq_setup(struct rte_eth_dev *dev, struct ci_tx_queue *txq, struct cpfl_vport *cpfl_vport = dev->data->dev_private; struct idpf_vport *vport = &cpfl_vport->base; const struct rte_memzone *mz; - struct ci_tx_queue *cq; + struct idpf_complq *cq; int ret; cq = rte_zmalloc_socket("cpfl splitq cq", @@ -813,7 +813,8 @@ cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, struct cpfl_txq_hairpin_info *hairpin_info; struct idpf_hw *hw = &adapter_base->hw; struct cpfl_tx_queue *cpfl_txq; - struct ci_tx_queue *txq, *cq; + struct ci_tx_queue *txq; + struct idpf_complq *cq; const struct rte_memzone *mz; uint32_t ring_size; uint16_t peer_port, peer_q; @@ -894,7 +895,7 @@ cpfl_tx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, logic_qid, cpfl_vport->p2p_q_chunks_info->tx_qtail_spacing); if (cpfl_vport->p2p_tx_complq == NULL) { cq = rte_zmalloc_socket("cpfl hairpin cq", - sizeof(struct ci_tx_queue), + sizeof(struct idpf_complq), RTE_CACHE_LINE_SIZE, dev->device->numa_node); if (!cq) { @@ -996,7 +997,7 @@ cpfl_hairpin_rxq_config(struct idpf_vport *vport, struct cpfl_rx_queue *cpfl_rxq int cpfl_hairpin_tx_complq_config(struct cpfl_vport *cpfl_vport) { - struct ci_tx_queue *tx_complq = cpfl_vport->p2p_tx_complq; + struct idpf_complq *tx_complq = cpfl_vport->p2p_tx_complq; struct virtchnl2_txq_info txq_info; memset(&txq_info, 0, sizeof(txq_info)); diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.c b/drivers/net/intel/idpf/idpf_common_rxtx.c index f73716e57c..f69ae8b5f0 100644 --- a/drivers/net/intel/idpf/idpf_common_rxtx.c +++ b/drivers/net/intel/idpf/idpf_common_rxtx.c @@ -237,7 +237,7 @@ idpf_qc_split_tx_descq_reset(struct ci_tx_queue *txq) RTE_EXPORT_INTERNAL_SYMBOL(idpf_qc_split_tx_complq_reset) void -idpf_qc_split_tx_complq_reset(struct ci_tx_queue *cq) +idpf_qc_split_tx_complq_reset(struct idpf_complq *cq) { uint32_t i, size; @@ -782,7 +782,7 @@ idpf_dp_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, } static inline void -idpf_split_tx_free(struct ci_tx_queue *cq) +idpf_split_tx_free(struct idpf_complq *cq) { volatile struct idpf_splitq_tx_compl_desc *compl_ring = cq->compl_ring; volatile struct idpf_splitq_tx_compl_desc *txd; diff --git a/drivers/net/intel/idpf/idpf_common_rxtx.h b/drivers/net/intel/idpf/idpf_common_rxtx.h index f512700d5f..b2d33287df 100644 --- a/drivers/net/intel/idpf/idpf_common_rxtx.h +++ b/drivers/net/intel/idpf/idpf_common_rxtx.h @@ -181,6 +181,20 @@ struct idpf_rxq_ops { void (*release_mbufs)(struct idpf_rx_queue *rxq); }; +/* Dedicated completion queue structure for IDPF split queue model. */ +struct idpf_complq { + volatile struct idpf_splitq_tx_compl_desc *compl_ring; + void **txqs; + rte_iova_t tx_ring_dma; + const struct rte_memzone *mz; + uint32_t tx_start_qid; + uint16_t nb_tx_desc; + uint16_t tx_tail; + uint16_t queue_id; + uint16_t port_id; + uint8_t expected_gen_id; +}; + extern int idpf_timestamp_dynfield_offset; extern uint64_t idpf_timestamp_dynflag; @@ -202,7 +216,7 @@ void idpf_qc_single_rx_queue_reset(struct idpf_rx_queue *rxq); __rte_internal void idpf_qc_split_tx_descq_reset(struct ci_tx_queue *txq); __rte_internal -void idpf_qc_split_tx_complq_reset(struct ci_tx_queue *cq); +void idpf_qc_split_tx_complq_reset(struct idpf_complq *cq); __rte_internal void idpf_splitq_rearm_common(struct idpf_rx_queue *rx_bufq); __rte_internal diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c index db7728afad..3fb2efdb56 100644 --- a/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c +++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx2.c @@ -781,9 +781,9 @@ idpf_dp_singleq_xmit_pkts_avx2(void *tx_queue, struct rte_mbuf **tx_pkts, } static __rte_always_inline void -idpf_splitq_scan_cq_ring(struct ci_tx_queue *cq) +idpf_splitq_scan_cq_ring(struct idpf_complq *cq) { - struct idpf_splitq_tx_compl_desc *compl_ring; + volatile struct idpf_splitq_tx_compl_desc *compl_ring; struct ci_tx_queue *txq; uint16_t genid, txq_qid, cq_qid, i; uint8_t ctype; diff --git a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c index 9af275cd9d..8db4c64106 100644 --- a/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c +++ b/drivers/net/intel/idpf/idpf_common_rxtx_avx512.c @@ -1101,9 +1101,9 @@ idpf_dp_singleq_xmit_pkts_avx512(void *tx_queue, struct rte_mbuf **tx_pkts, } static __rte_always_inline void -idpf_splitq_scan_cq_ring(struct ci_tx_queue *cq) +idpf_splitq_scan_cq_ring(struct idpf_complq *cq) { - struct idpf_splitq_tx_compl_desc *compl_ring; + volatile struct idpf_splitq_tx_compl_desc *compl_ring; struct ci_tx_queue *txq; uint16_t genid, txq_qid, cq_qid, i; uint8_t ctype; diff --git a/drivers/net/intel/idpf/idpf_rxtx.c b/drivers/net/intel/idpf/idpf_rxtx.c index b316c77b62..31005ce210 100644 --- a/drivers/net/intel/idpf/idpf_rxtx.c +++ b/drivers/net/intel/idpf/idpf_rxtx.c @@ -360,7 +360,7 @@ idpf_tx_complq_setup(struct rte_eth_dev *dev, struct ci_tx_queue *txq, { struct idpf_vport *vport = dev->data->dev_private; const struct rte_memzone *mz; - struct ci_tx_queue *cq; + struct idpf_complq *cq; int ret; cq = rte_zmalloc_socket("idpf splitq cq", -- 2.43.0

