There are some holes in data struct lcore_conf. The holes are due to alignment requirement.
For struct lcore_rx_queue, there is no need to make every element of this type to be cache line aligned, because the data is not shared between cores. Member len of struct mbuf_table can be moved out. So data can be packed and there will be no need to load an extra cache line when mbuf table is empty. The change showed slight performance improvement on N1SDP platform. Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> Signed-off-by: Ruifeng Wang <ruifeng.w...@arm.com> --- examples/l3fwd/l3fwd.h | 12 ++++++------ examples/l3fwd/l3fwd_common.h | 4 ++-- examples/l3fwd/l3fwd_em.c | 6 +++--- examples/l3fwd/l3fwd_lpm.c | 6 +++--- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index 2cf06099e..f3a301e12 100644 --- a/examples/l3fwd/l3fwd.h +++ b/examples/l3fwd/l3fwd.h @@ -57,22 +57,22 @@ #define HASH_ENTRY_NUMBER_DEFAULT 4 struct mbuf_table { - uint16_t len; struct rte_mbuf *m_table[MAX_PKT_BURST]; }; struct lcore_rx_queue { uint16_t port_id; uint8_t queue_id; -} __rte_cache_aligned; +}; struct lcore_conf { - uint16_t n_rx_queue; struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE]; - uint16_t n_tx_port; uint16_t tx_port_id[RTE_MAX_ETHPORTS]; uint16_t tx_queue_id[RTE_MAX_ETHPORTS]; + uint16_t tx_mbuf_len[RTE_MAX_ETHPORTS]; struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS]; + uint16_t n_rx_queue; + uint16_t n_tx_port; void *ipv4_lookup_struct; void *ipv6_lookup_struct; } __rte_cache_aligned; @@ -122,7 +122,7 @@ send_single_packet(struct lcore_conf *qconf, { uint16_t len; - len = qconf->tx_mbufs[port].len; + len = qconf->tx_mbuf_len[port]; qconf->tx_mbufs[port].m_table[len] = m; len++; @@ -132,7 +132,7 @@ send_single_packet(struct lcore_conf *qconf, len = 0; } - qconf->tx_mbufs[port].len = len; + qconf->tx_mbuf_len[port] = len; return 0; } diff --git a/examples/l3fwd/l3fwd_common.h b/examples/l3fwd/l3fwd_common.h index 7d83ff641..05e03dbfc 100644 --- a/examples/l3fwd/l3fwd_common.h +++ b/examples/l3fwd/l3fwd_common.h @@ -183,7 +183,7 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[], { uint32_t len, j, n; - len = qconf->tx_mbufs[port].len; + len = qconf->tx_mbuf_len[port]; /* * If TX buffer for that queue is empty, and we have enough packets, @@ -258,7 +258,7 @@ send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[], } } - qconf->tx_mbufs[port].len = len; + qconf->tx_mbuf_len[port] = len; } #endif /* _L3FWD_COMMON_H_ */ diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index 9996bfba3..1970e0376 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -662,12 +662,12 @@ em_main_loop(__rte_unused void *dummy) for (i = 0; i < qconf->n_tx_port; ++i) { portid = qconf->tx_port_id[i]; - if (qconf->tx_mbufs[portid].len == 0) + if (qconf->tx_mbuf_len[portid] == 0) continue; send_burst(qconf, - qconf->tx_mbufs[portid].len, + qconf->tx_mbuf_len[portid], portid); - qconf->tx_mbufs[portid].len = 0; + qconf->tx_mbuf_len[portid] = 0; } prev_tsc = cur_tsc; diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index d338590b9..e62139a0e 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -220,12 +220,12 @@ lpm_main_loop(__rte_unused void *dummy) for (i = 0; i < n_tx_p; ++i) { portid = qconf->tx_port_id[i]; - if (qconf->tx_mbufs[portid].len == 0) + if (qconf->tx_mbuf_len[portid] == 0) continue; send_burst(qconf, - qconf->tx_mbufs[portid].len, + qconf->tx_mbuf_len[portid], portid); - qconf->tx_mbufs[portid].len = 0; + qconf->tx_mbuf_len[portid] = 0; } prev_tsc = cur_tsc; -- 2.25.1