On 3 July 2017 at 14:01, Matias Elo <matias....@nokia.com> wrote: > Implements experimental zero-copy mode for DPDK pktio. This can be enabled > with additional '--enable-dpdk-zero-copy' configure flag. > > This feature has been put behind an extra configure flag as it doesn't > entirely adhere to the DPDK API and may behave unexpectedly with untested > DPDK NIC drivers. Zero-copy operation has been tested with pcap, ixgbe, and > i40e drivers. > > Signed-off-by: Matias Elo <matias....@nokia.com> > --- > .../linux-generic/include/odp_buffer_internal.h | 2 +- > .../linux-generic/include/odp_packet_internal.h | 13 + > platform/linux-generic/include/odp_pool_internal.h | 4 + > platform/linux-generic/m4/odp_dpdk.m4 | 14 +- > platform/linux-generic/odp_pool.c | 2 + > platform/linux-generic/pktio/dpdk.c | 676 > ++++++++++++++++----- > 6 files changed, 562 insertions(+), 149 deletions(-) > > diff --git a/platform/linux-generic/include/odp_buffer_internal.h > b/platform/linux-generic/include/odp_buffer_internal.h > index 076abe9..78ea527 100644 > --- a/platform/linux-generic/include/odp_buffer_internal.h > +++ b/platform/linux-generic/include/odp_buffer_internal.h > @@ -109,7 +109,7 @@ struct odp_buffer_hdr_t { > > /* Data or next header */ > uint8_t data[0]; > -}; > +} ODP_ALIGNED_CACHE; > > ODP_STATIC_ASSERT(CONFIG_PACKET_MAX_SEGS < 256, > "CONFIG_PACKET_MAX_SEGS_TOO_LARGE"); > diff --git a/platform/linux-generic/include/odp_packet_internal.h > b/platform/linux-generic/include/odp_packet_internal.h > index 11f2fdc..78569b6 100644 > --- a/platform/linux-generic/include/odp_packet_internal.h > +++ b/platform/linux-generic/include/odp_packet_internal.h > @@ -92,6 +92,12 @@ typedef struct { > uint32_t l4_offset; /**< offset to L4 hdr (TCP, UDP, SCTP, also > ICMP) */ > } packet_parser_t; > > +/* Packet extra data length */ > +#define PKT_EXTRA_LEN 128 > + > +/* Packet extra data types */ > +#define PKT_EXTRA_TYPE_DPDK 1 > + > /** > * Internal Packet header > * > @@ -131,6 +137,13 @@ typedef struct { > /* Result for crypto */ > odp_crypto_generic_op_result_t op_result; > > +#ifdef ODP_PKTIO_DPDK > + /* Type of extra data */ > + uint8_t extra_type; > + /* Extra space for packet descriptors. E.g. DPDK mbuf */ > + uint8_t extra[PKT_EXTRA_LEN] ODP_ALIGNED_CACHE; > +#endif > + > /* Packet data storage */ > uint8_t data[0]; > } odp_packet_hdr_t; > diff --git a/platform/linux-generic/include/odp_pool_internal.h > b/platform/linux-generic/include/odp_pool_internal.h > index ebb779d..acea079 100644 > --- a/platform/linux-generic/include/odp_pool_internal.h > +++ b/platform/linux-generic/include/odp_pool_internal.h > @@ -68,6 +68,10 @@ typedef struct pool_t { > uint8_t *base_addr; > uint8_t *uarea_base_addr; > > + /* Used by DPDK zero-copy pktio */ > + void *ext_desc; > + uint16_t ext_ref_count; > + > pool_cache_t local_cache[ODP_THREAD_COUNT_MAX]; > > odp_shm_t ring_shm; > diff --git a/platform/linux-generic/m4/odp_dpdk.m4 > b/platform/linux-generic/m4/odp_dpdk.m4 > index 58d1472..edcc4c8 100644 > --- a/platform/linux-generic/m4/odp_dpdk.m4 > +++ b/platform/linux-generic/m4/odp_dpdk.m4 > @@ -9,6 +9,16 @@ AC_HELP_STRING([--with-dpdk-path=DIR path to dpdk > build directory]), > pktio_dpdk_support=yes],[]) > > ############################################################ > ############## > +# Enable zero-copy DPDK pktio > +########################################################### > ############### > +zero_copy=0 > +AC_ARG_ENABLE([dpdk-zero-copy], > + [ --enable-dpdk-zero-copy enable experimental zero-copy DPDK pktio > mode], > + [if test x$enableval = xyes; then > + zero_copy=1 > + fi]) > + > +########################################################### > ############### > # Save and set temporary compilation flags > ############################################################ > ############## > OLD_CPPFLAGS=$CPPFLAGS > @@ -38,9 +48,9 @@ then > done > DPDK_PMD+=--no-whole-archive > > - ODP_CFLAGS="$ODP_CFLAGS -DODP_PKTIO_DPDK" > + ODP_CFLAGS="$ODP_CFLAGS -DODP_PKTIO_DPDK -DODP_DPDK_ZERO_COPY=$zero_ > copy" > AM_LDFLAGS="$AM_LDFLAGS -L$DPDK_PATH/lib -Wl,$DPDK_PMD" > - LIBS="$LIBS -ldpdk -ldl -lpcap" > + LIBS="$LIBS -ldpdk -ldl -lpcap -lm" > else > pktio_dpdk_support=no > fi > diff --git a/platform/linux-generic/odp_pool.c > b/platform/linux-generic/odp_pool.c > index 5360b94..8a27c8a 100644 > --- a/platform/linux-generic/odp_pool.c > +++ b/platform/linux-generic/odp_pool.c > @@ -395,6 +395,8 @@ static odp_pool_t pool_create(const char *name, > odp_pool_param_t *params, > pool->uarea_size = uarea_size; > pool->shm_size = num * block_size; > pool->uarea_shm_size = num * uarea_size; > + pool->ext_desc = NULL; > + pool->ext_ref_count = 0; > > shm = odp_shm_reserve(pool->name, pool->shm_size, > ODP_PAGE_SIZE, shmflags); > diff --git a/platform/linux-generic/pktio/dpdk.c b/platform/linux-generic/ > pktio/dpdk.c > index c52cd09..154b9dc 100644 > --- a/platform/linux-generic/pktio/dpdk.c > +++ b/platform/linux-generic/pktio/dpdk.c > @@ -11,6 +11,7 @@ > #include <sched.h> > #include <ctype.h> > #include <unistd.h> > +#include <math.h> > > #include <odp/api/cpumask.h> > > @@ -25,10 +26,19 @@ > #include <protocols/eth.h> > > #include <rte_config.h> > +#include <rte_malloc.h> > #include <rte_mbuf.h> > +#include <rte_mempool.h> > #include <rte_ethdev.h> > #include <rte_string_fns.h> > > +#if ODP_DPDK_ZERO_COPY > +ODP_STATIC_ASSERT(CONFIG_PACKET_HEADROOM == RTE_PKTMBUF_HEADROOM, > + "ODP and DPDK headroom sizes not matching!"); > +ODP_STATIC_ASSERT(PKT_EXTRA_LEN >= sizeof(struct rte_mbuf), > + "DPDK rte_mbuf won't fit in odp_packet_hdr_t.extra!"); > +#endif > + > static int disable_pktio; /** !0 this pktio disabled, 0 enabled */ > > /* Has dpdk_pktio_init() been called */ > @@ -58,6 +68,453 @@ void refer_constructors(void) > mp_hdlr_init_ops_stack(); > } > > +static inline uint16_t mbuf_data_off(struct rte_mbuf *mbuf, > + odp_packet_hdr_t *pkt_hdr) > +{ > + return (uint64_t)pkt_hdr->buf_hdr.seg[0].data - > + (uint64_t)mbuf->buf_addr; > +} > + > +/** > + * Update mbuf > + * > + * Called always before rte_mbuf is passed to DPDK. > + */ > +static inline void mbuf_update(struct rte_mbuf *mbuf, odp_packet_hdr_t > *pkt_hdr, > + uint16_t pkt_len) > +{ > + mbuf->data_len = pkt_len; > + mbuf->pkt_len = pkt_len; > + mbuf->refcnt = 1; > + > + if (odp_unlikely(pkt_hdr->buf_hdr.base_data != > + pkt_hdr->buf_hdr.seg[0].data)) > + mbuf->data_off = mbuf_data_off(mbuf, pkt_hdr); > +} > + > +/** > + * Initialize mbuf > + * > + * Called once per ODP packet. > + */ > +static void mbuf_init(struct rte_mempool *mp, struct rte_mbuf *mbuf, > + odp_packet_hdr_t *pkt_hdr) > +{ > + void *buf_addr = pkt_hdr->buf_hdr.base_data - RTE_PKTMBUF_HEADROOM; > + > + rte_mem_lock_page(buf_addr); > + > + memset(mbuf, 0, sizeof(struct rte_mbuf)); > + > + mbuf->priv_size = 0; > + mbuf->buf_addr = buf_addr; > + mbuf->buf_physaddr = rte_mem_virt2phy(buf_addr); > + if (odp_unlikely(mbuf->buf_physaddr == RTE_BAD_PHYS_ADDR)) > + ODP_ABORT("Failed to map virt addr to phy"); > + > + mbuf->buf_len = (uint16_t)rte_pktmbuf_data_room_size(mp); > + mbuf->data_off = RTE_PKTMBUF_HEADROOM; > + mbuf->pool = mp; > + mbuf->refcnt = 1; > + mbuf->nb_segs = 1; > + mbuf->port = 0xff; > + > + /* Store ODP packet handle inside rte_mbuf */ > + mbuf->userdata = packet_handle(pkt_hdr); > + pkt_hdr->extra_type = PKT_EXTRA_TYPE_DPDK; > +} > + > +/** > + * Create custom DPDK packet pool > + */ > +static struct rte_mempool *mbuf_pool_create(const char *name, unsigned > num, > + unsigned cache_size, > + uint16_t data_room_size, > + pkt_dpdk_t *cfg) > +{ > + struct rte_mempool *mp; > + struct rte_pktmbuf_pool_private mbp_priv; > + unsigned elt_size; > + > + elt_size = sizeof(struct rte_mbuf) + (unsigned)data_room_size; > + mbp_priv.mbuf_data_room_size = data_room_size; > + mbp_priv.mbuf_priv_size = 0; > + > + mp = rte_mempool_create_empty(name, num, elt_size, cache_size, > + sizeof(struct > rte_pktmbuf_pool_private), > + rte_socket_id(), 0); > + if (mp == NULL) { > + ODP_ERR("Failed to create empty DPDK packet pool\n"); > + return NULL; > + } > + > + if (rte_mempool_set_ops_byname(mp, "odp_pool", cfg)) { > + ODP_ERR("Failed setting mempool operations\n"); > + return NULL; > + } > + > + rte_pktmbuf_pool_init(mp, &mbp_priv); > + > + if (rte_mempool_ops_alloc(mp)) { > + ODP_ERR("Failed allocating mempool\n"); > + return NULL; > + } > + > + return mp; > +} > + > +/** > + * Calculate valid cache size for DPDK packet pool > + */ > +static unsigned cache_size(uint32_t num) > +{ > + unsigned size = 0; > + unsigned i; > + > + if (!RTE_MEMPOOL_CACHE_MAX_SIZE) > + return 0; > + > + i = ceil((double)num / RTE_MEMPOOL_CACHE_MAX_SIZE); > + i = RTE_MAX(i, 2UL); > + for (; i <= (num / 2); ++i) > + if ((num % i) == 0) { > + size = num / i; > + break; > + } > + if (odp_unlikely(size > RTE_MEMPOOL_CACHE_MAX_SIZE || > + (uint32_t)size * 1.5 > num)) { > + ODP_ERR("Cache size calc failure: %d\n", size); > + size = 0; > + } > + > + return size; > +} > + > +/* DPDK external memory pool operations */ > + > +static int pool_enqueue(struct rte_mempool *mp ODP_UNUSED, > + void * const *obj_table, unsigned num) > +{ > + odp_packet_t pkt_tbl[num]; > + unsigned i; > + > + if (odp_unlikely(num == 0)) > + return 0; > + > + for (i = 0; i < num; i++) > + pkt_tbl[i] = (odp_packet_t)((struct rte_mbuf *) > + obj_table[i])->userdata; > + > + odp_packet_free_multi(pkt_tbl, num); > + > + return 0; > +} > + > +static int pool_dequeue_bulk(struct rte_mempool *mp, void **obj_table, > + unsigned num) > +{ > + pkt_dpdk_t *pkt_dpdk = mp->pool_config; > + odp_packet_t packet_tbl[num]; > + int pkts; > + int i; > + > + pkts = packet_alloc_multi(pkt_dpdk->pool, pkt_dpdk->data_room, > + packet_tbl, num); > + > + if (odp_unlikely(pkts != (int)num)) { > + if (pkts > 0) > + odp_packet_free_multi(packet_tbl, pkts); > + return -ENOENT; > + } > + > + for (i = 0; i < pkts; i++) { > + odp_packet_t pkt = packet_tbl[i]; > + odp_packet_hdr_t *pkt_hdr = odp_packet_hdr(pkt); > + struct rte_mbuf *mbuf = (struct rte_mbuf *) > + (uintptr_t)pkt_hdr->extra; > + if (pkt_hdr->extra_type != PKT_EXTRA_TYPE_DPDK) > + mbuf_init(mp, mbuf, pkt_hdr); > + obj_table[i] = mbuf; > + } > + > + return 0; > +} > + > +static int pool_alloc(struct rte_mempool *mp) > +{ > + pkt_dpdk_t *pkt_dpdk = mp->pool_config; > + > + mp->pool_data = pkt_dpdk->pool; > + mp->flags |= MEMPOOL_F_POOL_CREATED; > + > + return 0; > +} > + > +static unsigned pool_get_count(const struct rte_mempool *mp) > +{ > + odp_pool_t pool = (odp_pool_t)mp->pool_data; > + odp_pool_info_t info; > + > + if (odp_pool_info(pool, &info)) { > + ODP_ERR("Failed to read pool info\n"); > + return 0; > + } > + return info.params.pkt.num; > +} > + > +static void pool_free(struct rte_mempool *mp) > +{ > + unsigned lcore_id; > + > + RTE_LCORE_FOREACH(lcore_id) { > + struct rte_mempool_cache *cache; > + > + cache = rte_mempool_default_cache(mp, lcore_id); > + if (cache != NULL) > + rte_mempool_cache_flush(cache, mp); > + } > +} > + > +static struct rte_mempool_ops ops_stack = { > + .name = "odp_pool", > + .alloc = pool_alloc, > + .free = pool_free, > + .enqueue = pool_enqueue, > + .dequeue = pool_dequeue_bulk, > + .get_count = pool_get_count > +}; > + > +MEMPOOL_REGISTER_OPS(ops_stack); > + > +static inline int mbuf_to_pkt(pktio_entry_t *pktio_entry, > + odp_packet_t pkt_table[], > + struct rte_mbuf *mbuf_table[], > + uint16_t mbuf_num, odp_time_t *ts) > +{ > + odp_packet_t pkt; > + odp_packet_hdr_t *pkt_hdr; > + uint16_t pkt_len; > + struct rte_mbuf *mbuf; > + void *buf; > + int i, j; > + int nb_pkts = 0; > + int alloc_len, num; > + odp_pool_t pool = pktio_entry->s.pkt_dpdk.pool; > + > + /* Allocate maximum sized packets */ > + alloc_len = pktio_entry->s.pkt_dpdk.data_room; > + > + num = packet_alloc_multi(pool, alloc_len, pkt_table, mbuf_num); > + if (num != mbuf_num) { > + ODP_DBG("packet_alloc_multi() unable to allocate all > packets: " > + "%d/%" PRIu16 " allocated\n", num, mbuf_num); > + for (i = num; i < mbuf_num; i++) > + rte_pktmbuf_free(mbuf_table[i]); > + } > + > + for (i = 0; i < num; i++) { > + odp_packet_hdr_t parsed_hdr; > + > + mbuf = mbuf_table[i]; > + if (odp_unlikely(mbuf->nb_segs != 1)) { > + ODP_ERR("Segmented buffers not supported\n"); > + goto fail; > + } > + > + buf = rte_pktmbuf_mtod(mbuf, char *); > + odp_prefetch(buf); > + > + pkt_len = rte_pktmbuf_pkt_len(mbuf); > + > + if (pktio_cls_enabled(pktio_entry)) { > + if (cls_classify_packet(pktio_entry, > + (const uint8_t *)buf, > + pkt_len, pkt_len, &pool, > + &parsed_hdr)) > + goto fail; > + } > + > + pkt = pkt_table[i]; > + pkt_hdr = odp_packet_hdr(pkt); > + pull_tail(pkt_hdr, alloc_len - pkt_len); > + > + if (odp_packet_copy_from_mem(pkt, 0, pkt_len, buf) != 0) > + goto fail; > + > + pkt_hdr->input = pktio_entry->s.handle; > + > + if (pktio_cls_enabled(pktio_entry)) > + copy_packet_cls_metadata(&parsed_hdr, pkt_hdr); > + else > + packet_parse_layer(pkt_hdr, > + pktio_entry->s.config.parser. > layer); > + > + if (mbuf->ol_flags & PKT_RX_RSS_HASH) > + odp_packet_flow_hash_set(pkt, mbuf->hash.rss); > + > + packet_set_ts(pkt_hdr, ts); > + > + pkt_table[nb_pkts++] = pkt; > + > + rte_pktmbuf_free(mbuf); > + } > + > + return nb_pkts; > + > +fail: > + odp_packet_free_multi(&pkt_table[i], num - i); > + > + for (j = i; j < num; j++) > + rte_pktmbuf_free(mbuf_table[j]); > + > + return (i > 0 ? i : -1); > +} > + > +static inline int pkt_to_mbuf(pktio_entry_t *pktio_entry, > + struct rte_mbuf *mbuf_table[], > + const odp_packet_t pkt_table[], uint16_t num) > +{ > + pkt_dpdk_t *pkt_dpdk = &pktio_entry->s.pkt_dpdk; > + int i, j; > + char *data; > + uint16_t pkt_len; > + > + if (odp_unlikely((rte_pktmbuf_alloc_bulk(pkt_dpdk->pkt_pool, > + mbuf_table, num)))) { > + ODP_ERR("Failed to alloc mbuf\n"); > + return 0; > + } > + for (i = 0; i < num; i++) { > + pkt_len = _odp_packet_len(pkt_table[i]); > + > + if (pkt_len > pkt_dpdk->mtu) { > + if (i == 0) > + __odp_errno = EMSGSIZE; > + goto fail; > + } > + > + /* Packet always fits in mbuf */ > + data = rte_pktmbuf_append(mbuf_table[i], pkt_len); > + > + odp_packet_copy_to_mem(pkt_table[i], 0, pkt_len, data); > + } > + return i; > + > +fail: > + for (j = i; j < num; j++) > + rte_pktmbuf_free(mbuf_table[j]); > + > + return i; > +} > + > +static inline int mbuf_to_pkt_zero(pktio_entry_t *pktio_entry, > + odp_packet_t pkt_table[], > + struct rte_mbuf *mbuf_table[], > + uint16_t mbuf_num, odp_time_t *ts) > +{ > + odp_packet_t pkt; > + odp_packet_hdr_t *pkt_hdr; > + uint16_t pkt_len; > + struct rte_mbuf *mbuf; > + void *buf; > + int i; > + int nb_pkts = 0; > + odp_pool_t pool = pktio_entry->s.pkt_dpdk.pool; > + > + for (i = 0; i < mbuf_num; i++) { > + odp_packet_hdr_t parsed_hdr; > + > + mbuf = mbuf_table[i]; > + if (odp_unlikely(mbuf->nb_segs != 1)) { > + ODP_ERR("Segmented buffers not supported\n"); > + rte_pktmbuf_free(mbuf); > + continue; > + } > + > + buf = rte_pktmbuf_mtod(mbuf, char *); > + pkt_len = rte_pktmbuf_pkt_len(mbuf); > + > + pkt = (odp_packet_t)mbuf->userdata; > + pkt_hdr = odp_packet_hdr(pkt); > + > + if (pktio_cls_enabled(pktio_entry)) { > + if (cls_classify_packet(pktio_entry, > + (const uint8_t *)buf, > + pkt_len, pkt_len, &pool, > + &parsed_hdr)) > + ODP_ERR("Unable to classify packet\n"); > + rte_pktmbuf_free(mbuf); > + continue; > + } > + > + /* Init buffer segments. Currently, only single segment > packets > + * are supported. */ > + pkt_hdr->buf_hdr.seg[0].data = buf; > + > + packet_init(pkt_hdr, pkt_len); > + pkt_hdr->input = pktio_entry->s.handle; > + > + if (pktio_cls_enabled(pktio_entry)) > + copy_packet_cls_metadata(&parsed_hdr, pkt_hdr); > + else > + packet_parse_layer(pkt_hdr, > + pktio_entry->s.config.parser. > layer); > + > + if (mbuf->ol_flags & PKT_RX_RSS_HASH) > + odp_packet_flow_hash_set(pkt, mbuf->hash.rss); > + > + packet_set_ts(pkt_hdr, ts); > + > + pkt_table[nb_pkts++] = pkt; > + } > + > + return nb_pkts; > +} > + > +static inline int pkt_to_mbuf_zero(pktio_entry_t *pktio_entry, > + struct rte_mbuf *mbuf_table[], > + const odp_packet_t pkt_table[], > uint16_t num, > + uint16_t *seg_count) > +{ > + pkt_dpdk_t *pkt_dpdk = &pktio_entry->s.pkt_dpdk; > + int i; > + > + *seg_count = 0; > + > + for (i = 0; i < num; i++) { > + odp_packet_t pkt = pkt_table[i]; > + odp_packet_hdr_t *pkt_hdr = odp_packet_hdr(pkt); > + struct rte_mbuf *mbuf = (struct rte_mbuf *) > + (uintptr_t)pkt_hdr->extra; > + uint16_t pkt_len = odp_packet_len(pkt); > + > + if (odp_unlikely(pkt_len > pkt_dpdk->mtu)) > + goto fail; > + > + if (odp_likely(pkt_hdr->buf_hdr.segcount == 1)) { > + if (odp_unlikely(pkt_hdr->extra_type != > + PKT_EXTRA_TYPE_DPDK)) > + mbuf_init(pkt_dpdk->pkt_pool, mbuf, > pkt_hdr); > + > + mbuf_update(mbuf, pkt_hdr, pkt_len); > + } else { > + /* Fall back to packet copy */ > + if (odp_unlikely(pkt_to_mbuf(pktio_entry, &mbuf, > + &pkt, 1) != 1)) > + goto fail; > + (*seg_count)++; > + } > + > + mbuf_table[i] = mbuf; > + } > + return i; > + > +fail: > + if (i == 0) > + __odp_errno = EMSGSIZE; > + return i; > +} > + > /* Test if s has only digits or not. Dpdk pktio uses only digits.*/ > static int dpdk_netdev_is_valid(const char *s) > { > @@ -201,13 +658,12 @@ static int dpdk_setup_port(pktio_entry_t > *pktio_entry) > struct rte_eth_conf port_conf = { > .rxmode = { > .mq_mode = ETH_MQ_RX_RSS, > - .max_rx_pkt_len = pkt_dpdk->data_room, > .split_hdr_size = 0, > .header_split = 0, > .hw_ip_checksum = 0, > .hw_vlan_filter = 0, > - .jumbo_frame = 1, > .hw_strip_crc = 0, > + .enable_scatter = 0, > }, > .rx_adv_conf = { > .rss_conf = rss_conf, > @@ -231,6 +687,7 @@ static int dpdk_setup_port(pktio_entry_t *pktio_entry) > static int dpdk_close(pktio_entry_t *pktio_entry) > { > pkt_dpdk_t *pkt_dpdk = &pktio_entry->s.pkt_dpdk; > + pool_t *pool_entry = pool_entry_from_hdl(pkt_dpdk->pool); > unsigned idx; > unsigned i, j; > > @@ -245,7 +702,15 @@ static int dpdk_close(pktio_entry_t *pktio_entry) > if (pktio_entry->s.state != PKTIO_STATE_OPENED) > rte_eth_dev_close(pkt_dpdk->port_id); > > - rte_mempool_free(pkt_dpdk->pkt_pool); > + if (ODP_DPDK_ZERO_COPY) { > + if (pool_entry->ext_desc && pool_entry->ext_ref_count == > 1) { > + rte_mempool_free(pkt_dpdk->pkt_pool); > + pool_entry->ext_desc = NULL; > + } > + } else { > + rte_mempool_free(pkt_dpdk->pkt_pool); > + } > + pool_entry->ext_ref_count--; > > return 0; > } > @@ -444,16 +909,17 @@ static int dpdk_open(odp_pktio_t id ODP_UNUSED, > pkt_dpdk_t *pkt_dpdk = &pktio_entry->s.pkt_dpdk; > struct rte_eth_dev_info dev_info; > struct rte_mempool *pkt_pool; > - odp_pool_info_t pool_info; > uint16_t data_room; > uint32_t mtu; > int i; > + pool_t *pool_entry; > > if (disable_pktio) > return -1; > > if (pool == ODP_POOL_INVALID) > return -1; > + pool_entry = pool_entry_from_hdl(pool); > > if (!dpdk_netdev_is_valid(netdev)) { > ODP_ERR("Invalid dpdk netdev: %s\n", netdev); > @@ -481,11 +947,6 @@ static int dpdk_open(odp_pktio_t id ODP_UNUSED, > return -1; > } > > - if (odp_pool_info(pool, &pool_info) < 0) { > - ODP_ERR("Failed to read pool info\n"); > - return -1; > - } > - > dpdk_init_capability(pktio_entry, &dev_info); > > mtu = dpdk_mtu_get(pktio_entry); > @@ -506,19 +967,37 @@ static int dpdk_open(odp_pktio_t id ODP_UNUSED, > pkt_dpdk->min_rx_burst = DPDK_IXGBE_MIN_RX_BURST; > else > pkt_dpdk->min_rx_burst = 0; > + if (ODP_DPDK_ZERO_COPY) { > + if (pool_entry->ext_desc != NULL) { > + pkt_pool = (struct rte_mempool > *)pool_entry->ext_desc; > + } else { > + unsigned cache = cache_size(pool_entry->num); > > - pkt_pool = rte_pktmbuf_pool_create(pkt_dpdk->pool_name, > DPDK_NB_MBUF, > - DPDK_MEMPOOL_CACHE_SIZE, 0, > - DPDK_MBUF_BUF_SIZE, > rte_socket_id()); > + pkt_pool = mbuf_pool_create(pkt_dpdk->pool_name, > + pool_entry->num, cache, > + > pool_entry->max_seg_len + > + CONFIG_PACKET_HEADROOM, > + pkt_dpdk); > instead of passing the whole pkt_dpdk struct, can you just pass odp_pool_t & calculate just data_room in the dequeue_bulk ?. This way the pool configuration will be more clearer & less dependent on pkt_dpdk. /Krishna
> + pool_entry->ext_desc = pkt_pool; > + } > + } else { > + pkt_pool = rte_pktmbuf_pool_create(pkt_dpdk->pool_name, > + DPDK_NB_MBUF, > + > DPDK_MEMPOOL_CACHE_SIZE, 0, > + DPDK_MBUF_BUF_SIZE, > + rte_socket_id()); > + } > if (pkt_pool == NULL) { > ODP_ERR("Cannot init mbuf packet pool\n"); > return -1; > } > + pool_entry->ext_ref_count++; > + > pkt_dpdk->pkt_pool = pkt_pool; > > data_room = rte_pktmbuf_data_room_size(pkt_dpdk->pkt_pool) - > RTE_PKTMBUF_HEADROOM; > - pkt_dpdk->data_room = RTE_MIN(pool_info.params.pkt.len, > data_room); > + pkt_dpdk->data_room = RTE_MIN(pool_entry->max_seg_len, data_room); > > /* Mbuf chaining not yet supported */ > pkt_dpdk->mtu = RTE_MIN(pkt_dpdk->mtu, pkt_dpdk->data_room); > @@ -591,129 +1070,6 @@ static int dpdk_stop(pktio_entry_t *pktio_entry) > return 0; > } > > -static inline int mbuf_to_pkt(pktio_entry_t *pktio_entry, > - odp_packet_t pkt_table[], > - struct rte_mbuf *mbuf_table[], > - uint16_t mbuf_num, odp_time_t *ts) > -{ > - odp_packet_t pkt; > - odp_packet_hdr_t *pkt_hdr; > - uint16_t pkt_len; > - struct rte_mbuf *mbuf; > - void *buf; > - int i, j; > - int nb_pkts = 0; > - int alloc_len, num; > - odp_pool_t pool = pktio_entry->s.pkt_dpdk.pool; > - > - /* Allocate maximum sized packets */ > - alloc_len = pktio_entry->s.pkt_dpdk.data_room; > - > - num = packet_alloc_multi(pool, alloc_len, pkt_table, mbuf_num); > - if (num != mbuf_num) { > - ODP_DBG("packet_alloc_multi() unable to allocate all > packets: " > - "%d/%" PRIu16 " allocated\n", num, mbuf_num); > - for (i = num; i < mbuf_num; i++) > - rte_pktmbuf_free(mbuf_table[i]); > - } > - > - for (i = 0; i < num; i++) { > - odp_packet_hdr_t parsed_hdr; > - > - mbuf = mbuf_table[i]; > - if (odp_unlikely(mbuf->nb_segs != 1)) { > - ODP_ERR("Segmented buffers not supported\n"); > - goto fail; > - } > - > - buf = rte_pktmbuf_mtod(mbuf, char *); > - odp_prefetch(buf); > - > - pkt_len = rte_pktmbuf_pkt_len(mbuf); > - > - if (pktio_cls_enabled(pktio_entry)) { > - if (cls_classify_packet(pktio_entry, > - (const uint8_t *)buf, > - pkt_len, pkt_len, &pool, > - &parsed_hdr)) > - goto fail; > - } > - > - pkt = pkt_table[i]; > - pkt_hdr = odp_packet_hdr(pkt); > - pull_tail(pkt_hdr, alloc_len - pkt_len); > - > - /* For now copy the data in the mbuf, > - worry about zero-copy later */ > - if (odp_packet_copy_from_mem(pkt, 0, pkt_len, buf) != 0) > - goto fail; > - > - pkt_hdr->input = pktio_entry->s.handle; > - > - if (pktio_cls_enabled(pktio_entry)) > - copy_packet_cls_metadata(&parsed_hdr, pkt_hdr); > - else > - packet_parse_layer(pkt_hdr, > - pktio_entry->s.config.parser. > layer); > - > - if (mbuf->ol_flags & PKT_RX_RSS_HASH) > - odp_packet_flow_hash_set(pkt, mbuf->hash.rss); > - > - packet_set_ts(pkt_hdr, ts); > - > - pkt_table[nb_pkts++] = pkt; > - > - rte_pktmbuf_free(mbuf); > - } > - > - return nb_pkts; > - > -fail: > - odp_packet_free_multi(&pkt_table[i], num - i); > - > - for (j = i; j < num; j++) > - rte_pktmbuf_free(mbuf_table[j]); > - > - return (i > 0 ? i : -1); > -} > - > -static inline int pkt_to_mbuf(pktio_entry_t *pktio_entry, > - struct rte_mbuf *mbuf_table[], > - const odp_packet_t pkt_table[], uint16_t num) > -{ > - pkt_dpdk_t *pkt_dpdk = &pktio_entry->s.pkt_dpdk; > - int i, j; > - char *data; > - uint16_t pkt_len; > - > - if (odp_unlikely((rte_pktmbuf_alloc_bulk(pkt_dpdk->pkt_pool, > - mbuf_table, num)))) { > - ODP_ERR("Failed to alloc mbuf\n"); > - return 0; > - } > - for (i = 0; i < num; i++) { > - pkt_len = _odp_packet_len(pkt_table[i]); > - > - if (pkt_len > pkt_dpdk->mtu) { > - if (i == 0) > - __odp_errno = EMSGSIZE; > - goto fail; > - } > - > - /* Packet always fits in mbuf */ > - data = rte_pktmbuf_append(mbuf_table[i], pkt_len); > - > - odp_packet_copy_to_mem(pkt_table[i], 0, pkt_len, data); > - } > - return i; > - > -fail: > - for (j = i; j < num; j++) > - rte_pktmbuf_free(mbuf_table[j]); > - > - return i; > -} > - > static int dpdk_recv(pktio_entry_t *pktio_entry, int index, > odp_packet_t pkt_table[], int num) > { > @@ -777,8 +1133,12 @@ static int dpdk_recv(pktio_entry_t *pktio_entry, int > index, > ts_val = odp_time_global(); > ts = &ts_val; > } > - nb_rx = mbuf_to_pkt(pktio_entry, pkt_table, rx_mbufs, > nb_rx, > - ts); > + if (ODP_DPDK_ZERO_COPY) > + nb_rx = mbuf_to_pkt_zero(pktio_entry, pkt_table, > + rx_mbufs, nb_rx, ts); > + else > + nb_rx = mbuf_to_pkt(pktio_entry, pkt_table, > rx_mbufs, > + nb_rx, ts); > } > > return nb_rx; > @@ -789,6 +1149,7 @@ static int dpdk_send(pktio_entry_t *pktio_entry, int > index, > { > struct rte_mbuf *tx_mbufs[num]; > pkt_dpdk_t *pkt_dpdk = &pktio_entry->s.pkt_dpdk; > + uint16_t seg_count = 0; > int tx_pkts; > int i; > int mbufs; > @@ -796,7 +1157,11 @@ static int dpdk_send(pktio_entry_t *pktio_entry, int > index, > if (odp_unlikely(pktio_entry->s.state != PKTIO_STATE_STARTED)) > return 0; > > - mbufs = pkt_to_mbuf(pktio_entry, tx_mbufs, pkt_table, num); > + if (ODP_DPDK_ZERO_COPY) > + mbufs = pkt_to_mbuf_zero(pktio_entry, tx_mbufs, pkt_table, > num, > + &seg_count); > + else > + mbufs = pkt_to_mbuf(pktio_entry, tx_mbufs, pkt_table, num); > > if (!pkt_dpdk->lockless_tx) > odp_ticketlock_lock(&pkt_dpdk->tx_lock[index]); > @@ -807,15 +1172,34 @@ static int dpdk_send(pktio_entry_t *pktio_entry, > int index, > if (!pkt_dpdk->lockless_tx) > odp_ticketlock_unlock(&pkt_dpdk->tx_lock[index]); > > - if (odp_unlikely(tx_pkts < num)) { > - for (i = tx_pkts; i < mbufs; i++) > - rte_pktmbuf_free(tx_mbufs[i]); > - } > + if (ODP_DPDK_ZERO_COPY) { > + /* Free copied segmented packets */ > + if (odp_unlikely(seg_count)) { > + uint16_t freed = 0; > > - if (odp_unlikely(tx_pkts == 0)) { > - if (__odp_errno != 0) > - return -1; > + for (i = 0; i < mbufs && freed != seg_count; i++) { > + odp_packet_t pkt = pkt_table[i]; > + odp_packet_hdr_t *pkt_hdr = > odp_packet_hdr(pkt); > + > + if (pkt_hdr->buf_hdr.segcount > 1) { > + if (odp_likely(i < tx_pkts)) > + odp_packet_free(pkt); > + else > + > rte_pktmbuf_free(tx_mbufs[i]); > + freed++; > + } > + } > + } > + if (odp_unlikely(tx_pkts == 0 && __odp_errno != 0)) > + return -1; > } else { > + if (odp_unlikely(tx_pkts < mbufs)) { > + for (i = tx_pkts; i < mbufs; i++) > + rte_pktmbuf_free(tx_mbufs[i]); > + } > + > + if (odp_unlikely(tx_pkts == 0 && __odp_errno != 0)) > + return -1; > odp_packet_free_multi(pkt_table, tx_pkts); > } > > -- > 2.7.4 > >