Initial implementation of the DPDK pktio type. Initialization code copied from the odp-dpdk branch.
Reviewed-by: Petri Savolainen <petri.savolai...@nokia.com> Signed-off-by: Matias Elo <matias....@nokia.com> --- platform/linux-generic/include/odp_packet_dpdk.h | 24 + platform/linux-generic/pktio/dpdk.c | 561 +++++++++++++++++++++-- 2 files changed, 559 insertions(+), 26 deletions(-) diff --git a/platform/linux-generic/include/odp_packet_dpdk.h b/platform/linux-generic/include/odp_packet_dpdk.h index cc02fd7..d05d4e0 100644 --- a/platform/linux-generic/include/odp_packet_dpdk.h +++ b/platform/linux-generic/include/odp_packet_dpdk.h @@ -10,10 +10,34 @@ #include <odp/api/packet_io.h> #include <odp/api/pool.h> +#include <net/if.h> + +#ifdef ODP_PKTIO_DPDK +#include <rte_config.h> +#include <rte_mbuf.h> + +#define DPDK_MEMORY_MB 512 +#define DPDK_NB_MBUF 16384 +#define DPDK_MBUF_BUF_SIZE RTE_MBUF_DEFAULT_BUF_SIZE +#define DPDK_MEMPOOL_CACHE_SIZE 32 +#define DPDK_NM_RX_DESC 128 +#define DPDK_NM_TX_DESC 512 + +_ODP_STATIC_ASSERT(DPDK_NB_MBUF % DPDK_MEMPOOL_CACHE_SIZE == 0 && + DPDK_MEMPOOL_CACHE_SIZE <= RTE_MEMPOOL_CACHE_MAX_SIZE && + DPDK_MEMPOOL_CACHE_SIZE <= DPDK_MBUF_BUF_SIZE / 1.5 + , "DPDK mempool cache size failure"); +#endif + /** Packet IO using DPDK interface */ typedef struct { odp_pool_t pool; /**< pool to alloc packets from */ + struct rte_mempool *pkt_pool; /**< DPDK packet pool */ odp_pktio_capability_t capa; /**< interface capabilities */ + uint32_t data_room; /**< maximum packet length */ + /** DPDK packet pool name (pktpool_<ifname>) */ + char pool_name[IF_NAMESIZE + 8]; + uint8_t port_id; /**< DPDK port identifier */ } pkt_dpdk_t; #endif diff --git a/platform/linux-generic/pktio/dpdk.c b/platform/linux-generic/pktio/dpdk.c index ca008ed..729f241 100644 --- a/platform/linux-generic/pktio/dpdk.c +++ b/platform/linux-generic/pktio/dpdk.c @@ -8,69 +8,578 @@ #include <odp_posix_extensions.h> +#include <sched.h> +#include <ctype.h> + +#include <odp/api/cpumask.h> + #include <odp_packet_io_internal.h> #include <odp_packet_dpdk.h> #include <odp_debug_internal.h> #include <rte_config.h> +#include <rte_mbuf.h> +#include <rte_ethdev.h> +#include <rte_string_fns.h> + +/* Has dpdk_pktio_init() been called */ +static odp_bool_t dpdk_initialized; + +#define PMD_EXT(drv) \ +extern void devinitfn_##drv(void) + +PMD_EXT(cryptodev_aesni_mb_pmd_drv); +PMD_EXT(pmd_qat_drv); +PMD_EXT(pmd_af_packet_drv); +PMD_EXT(rte_bnx2x_driver); +PMD_EXT(rte_bnx2xvf_driver); +PMD_EXT(bond_drv); +PMD_EXT(rte_cxgbe_driver); +PMD_EXT(em_pmd_drv); +PMD_EXT(pmd_igb_drv); +PMD_EXT(pmd_igbvf_drv); +PMD_EXT(rte_enic_driver); +PMD_EXT(rte_fm10k_driver); +PMD_EXT(rte_i40e_driver); +PMD_EXT(rte_i40evf_driver); +PMD_EXT(rte_ixgbe_driver); +PMD_EXT(rte_ixgbevf_driver); +PMD_EXT(rte_mlx4_driver); +PMD_EXT(rte_mlx5_driver); +PMD_EXT(pmd_mpipe_xgbe_drv); +PMD_EXT(pmd_mpipe_gbe_drv); +PMD_EXT(rte_nfp_net_driver); +PMD_EXT(pmd_null_drv); +PMD_EXT(pmd_pcap_drv); +PMD_EXT(pmd_ring_drv); +PMD_EXT(pmd_szedata2_drv); +PMD_EXT(rte_virtio_driver); +PMD_EXT(rte_vmxnet3_driver); +PMD_EXT(pmd_xenvirt_drv); + +/* + * This function is not called from anywhere, it's only purpose is to make sure + * that if ODP and DPDK are statically linked to an application, the GCC + * constuctors of the PMDs are linked as well. Otherwise the linker would omit + * them. It's not an issue with dynamic linking. */ +void refer_constructors(void); +void refer_constructors(void) +{ +#ifdef RTE_LIBRTE_PMD_AESNI_MB + devinitfn_cryptodev_aesni_mb_pmd_drv(); +#endif +#ifdef RTE_LIBRTE_PMD_QAT + devinitfn_pmd_qat_drv(); +#endif +#ifdef RTE_LIBRTE_PMD_AF_PACKET + devinitfn_pmd_af_packet_drv(); +#endif +#ifdef RTE_LIBRTE_BNX2X_PMD + devinitfn_rte_bnx2x_driver(); + devinitfn_rte_bnx2xvf_driver(); +#endif +#ifdef RTE_LIBRTE_PMD_BOND + devinitfn_bond_drv(); +#endif +#ifdef RTE_LIBRTE_CXGBE_PMD + devinitfn_rte_cxgbe_driver(); +#endif +#ifdef RTE_LIBRTE_EM_PMD + devinitfn_em_pmd_drv(); +#endif +#ifdef RTE_LIBRTE_IGB_PMD + devinitfn_pmd_igb_drv(); + devinitfn_pmd_igbvf_drv(); +#endif +#ifdef RTE_LIBRTE_ENIC_PMD + devinitfn_rte_enic_driver(); +#endif +#ifdef RTE_LIBRTE_FM10K_PMD + devinitfn_rte_fm10k_driver(); +#endif +#ifdef RTE_LIBRTE_I40E_PMD + devinitfn_rte_i40e_driver(); + devinitfn_rte_i40evf_driver(); +#endif +#ifdef RTE_LIBRTE_IXGBE_PMD + devinitfn_rte_ixgbe_driver(); + devinitfn_rte_ixgbevf_driver(); +#endif +#ifdef RTE_LIBRTE_MLX4_PMD + devinitfn_rte_mlx4_driver(); +#endif +#ifdef RTE_LIBRTE_MLX5_PMD + devinitfn_rte_mlx5_driver(); +#endif +#ifdef RTE_LIBRTE_MPIPE_PMD + devinitfn_pmd_mpipe_xgbe_drv() + devinitfn_pmd_mpipe_gbe_drv() +#endif +#ifdef RTE_LIBRTE_NFP_PMD + devinitfn_rte_nfp_net_driver(); +#endif +#ifdef RTE_LIBRTE_PMD_NULL + devinitfn_pmd_null_drv(); +#endif +#ifdef RTE_LIBRTE_PMD_PCAP + devinitfn_pmd_pcap_drv(); +#endif +#ifdef RTE_LIBRTE_PMD_RING + devinitfn_pmd_ring_drv(); +#endif +#ifdef RTE_LIBRTE_PMD_SZEDATA2 + devinitfn_pmd_szedata2_drv(); +#endif +#ifdef RTE_LIBRTE_VIRTIO_PMD + devinitfn_rte_virtio_driver(); +#endif +#ifdef RTE_LIBRTE_VMXNET3_PMD + devinitfn_rte_vmxnet3_driver(); +#endif +#ifdef RTE_LIBRTE_PMD_XENVIRT + devinitfn_pmd_xenvirt_drv(); +#endif +} + +/* Test if s has only digits or not. Dpdk pktio uses only digits.*/ +static int dpdk_netdev_is_valid(const char *s) +{ + while (*s) { + if (!isdigit(*s)) + return 0; + s++; + } + return 1; +} static int dpdk_close(pktio_entry_t *pktio_entry ODP_UNUSED) { return 0; } +static int dpdk_pktio_init(void) +{ + int dpdk_argc; + int i; + odp_cpumask_t mask; + char mask_str[ODP_CPUMASK_STR_SIZE]; + int32_t masklen; + int mem_str_len; + int cmd_len; + cpu_set_t original_cpuset; + struct rte_config *cfg; + + /** + * DPDK init changes the affinity of the calling thread, so after it + * returns the original affinity is restored. Only the first active + * core is passed to rte_eal_init(), as the rest would be used for + * DPDK's special lcore threads, which are only available through + * rte_eal_[mp_]remote_launch(), but not through ODP API's. + * Nevertheless, odp_local_init() makes sure for the rest of + * the DPDK libraries ODP threads look like proper DPDK threads. + */ + CPU_ZERO(&original_cpuset); + i = pthread_getaffinity_np(pthread_self(), + sizeof(original_cpuset), &original_cpuset); + if (i != 0) { + ODP_ERR("Failed to read thread affinity: %d\n", i); + return -1; + } + + odp_cpumask_zero(&mask); + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, &original_cpuset)) { + odp_cpumask_set(&mask, i); + break; + } + } + masklen = odp_cpumask_to_str(&mask, mask_str, ODP_CPUMASK_STR_SIZE); + + if (masklen < 0) { + ODP_ERR("CPU mask error: d\n", masklen); + return -1; + } + + mem_str_len = snprintf(NULL, 0, "%d", DPDK_MEMORY_MB); + + /* masklen includes the terminating null as well */ + cmd_len = strlen("odpdpdk -c -m ") + masklen + mem_str_len + + strlen(" "); + + char full_cmd[cmd_len]; + + /* first argument is facility log, simply bind it to odpdpdk for now.*/ + cmd_len = snprintf(full_cmd, cmd_len, "odpdpdk -c %s -m %d", + mask_str, DPDK_MEMORY_MB); + + for (i = 0, dpdk_argc = 1; i < cmd_len; ++i) { + if (isspace(full_cmd[i])) + ++dpdk_argc; + } + + char *dpdk_argv[dpdk_argc]; + + dpdk_argc = rte_strsplit(full_cmd, strlen(full_cmd), dpdk_argv, + dpdk_argc, ' '); + for (i = 0; i < dpdk_argc; ++i) + ODP_DBG("arg[%d]: %s\n", i, dpdk_argv[i]); + + i = rte_eal_init(dpdk_argc, dpdk_argv); + + if (i < 0) { + ODP_ERR("Cannot init the Intel DPDK EAL!\n"); + return -1; + } else if (i + 1 != dpdk_argc) { + ODP_DBG("Some DPDK args were not processed!\n"); + ODP_DBG("Passed: %d Consumed %d\n", dpdk_argc, i + 1); + } + ODP_DBG("rte_eal_init OK\n"); + + rte_set_log_level(RTE_LOG_WARNING); + + i = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), + &original_cpuset); + if (i) + ODP_ERR("Failed to reset thread affinity: %d\n", i); + + cfg = rte_eal_get_configuration(); + for (i = 0; i < RTE_MAX_LCORE; i++) + cfg->lcore_role[i] = ROLE_RTE; + + return 0; +} + +/* Placeholder for DPDK global init */ +static int odp_dpdk_pktio_init_global(void) +{ + return 0; +} + +static int odp_dpdk_pktio_init_local(void) +{ + int cpu; + + cpu = sched_getcpu(); + if (cpu < 0) { + ODP_ERR("getcpu failed\n"); + return -1; + } + + RTE_PER_LCORE(_lcore_id) = cpu; + + return 0; +} + static int dpdk_open(odp_pktio_t id ODP_UNUSED, - pktio_entry_t *pktio_entry ODP_UNUSED, - const char *netdev ODP_UNUSED, - odp_pool_t pool ODP_UNUSED) + pktio_entry_t *pktio_entry, + const char *netdev, + odp_pool_t pool) { + pkt_dpdk_t *pkt_dpdk = &pktio_entry->s.pkt_dpdk; + struct rte_mempool *pkt_pool; + odp_pool_info_t pool_info; + uint16_t data_room; + + if (getenv("ODP_PKTIO_DISABLE_DPDK")) + return -1; + + if (pool == ODP_POOL_INVALID) + return -1; + + if (!dpdk_netdev_is_valid(netdev)) { + ODP_ERR("Invalid dpdk netdev: %s\n", netdev); + return -1; + } + + /* Initialize DPDK here instead of odp_init_global() to enable running + * 'make check' without root privileges */ + if (dpdk_initialized == 0) { + dpdk_pktio_init(); + dpdk_initialized = 1; + } + + /* Init pktio entry */ + memset(pkt_dpdk, 0, sizeof(*pkt_dpdk)); + + pkt_dpdk->pool = pool; + pkt_dpdk->port_id = atoi(netdev); + + snprintf(pkt_dpdk->pool_name, sizeof(pkt_dpdk->pool_name), "pktpool_%s", + netdev); + + if (rte_eth_dev_count() == 0) { + ODP_ERR("No DPDK ports found\n"); + return -1; + } + + if (odp_pool_info(pool, &pool_info) < 0) { + ODP_ERR("Failed to read pool info\n"); + return -1; + } + + /* Look for previously opened packet pool */ + pkt_pool = rte_mempool_lookup(pkt_dpdk->pool_name); + if (pkt_pool == NULL) + pkt_pool = rte_pktmbuf_pool_create(pkt_dpdk->pool_name, + DPDK_NB_MBUF, + DPDK_MEMPOOL_CACHE_SIZE, 0, + DPDK_MBUF_BUF_SIZE, + rte_socket_id()); + if (pkt_pool == NULL) { + ODP_ERR("Cannot init mbuf packet pool\n"); + return -1; + } + pkt_dpdk->pkt_pool = pkt_pool; + + data_room = rte_pktmbuf_data_room_size(pkt_dpdk->pkt_pool) - + RTE_PKTMBUF_HEADROOM; + pkt_dpdk->data_room = RTE_MIN(pool_info.params.pkt.len, data_room); + return 0; } -static int dpdk_start(pktio_entry_t *pktio_entry ODP_UNUSED) +static int dpdk_start(pktio_entry_t *pktio_entry) { + pkt_dpdk_t *pkt_dpdk = &pktio_entry->s.pkt_dpdk; + uint8_t port_id = pkt_dpdk->port_id; + int ret; + unsigned i; + + struct rte_eth_conf port_conf = { + .rxmode = { + .max_rx_pkt_len = pkt_dpdk->data_room, + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .jumbo_frame = 1, /**< Jumbo Frame Support enabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, + }; + + /* DPDK doesn't support nb_rx_q/nb_tx_q being 0 */ + if (!pktio_entry->s.num_in_queue) + pktio_entry->s.num_in_queue = 1; + if (!pktio_entry->s.num_out_queue) + pktio_entry->s.num_out_queue = 1; + ret = rte_eth_dev_configure(pkt_dpdk->port_id, + pktio_entry->s.num_in_queue, + pktio_entry->s.num_out_queue, &port_conf); + if (ret < 0) { + ODP_ERR("Cannot configure device: err=%d, port=%" PRIu8 "\n", + ret, pkt_dpdk->port_id); + return -1; + } + /* Init TX queues */ + for (i = 0; i < pktio_entry->s.num_out_queue; i++) { + ret = rte_eth_tx_queue_setup(port_id, i, DPDK_NM_TX_DESC, + rte_eth_dev_socket_id(port_id), + NULL); + if (ret < 0) { + ODP_ERR("Queue setup failed: err=%d, port=%" PRIu8 "\n", + ret, port_id); + return -1; + } + } + /* Init RX queues */ + for (i = 0; i < pktio_entry->s.num_in_queue; i++) { + ret = rte_eth_rx_queue_setup(port_id, i, DPDK_NM_RX_DESC, + rte_eth_dev_socket_id(port_id), + NULL, pkt_dpdk->pkt_pool); + if (ret < 0) { + ODP_ERR("Queue setup failed: err=%d, port=%" PRIu8 "\n", + ret, port_id); + return -1; + } + } + /* Start device */ + ret = rte_eth_dev_start(port_id); + if (ret < 0) { + ODP_ERR("Device start failed: err=%d, port=%" PRIu8 "\n", + ret, port_id); + return -1; + } return 0; } -static int dpdk_stop(pktio_entry_t *pktio_entry ODP_UNUSED) +static int dpdk_stop(pktio_entry_t *pktio_entry) { + rte_eth_dev_stop(pktio_entry->s.pkt_dpdk.port_id); + return 0; } -static int dpdk_recv_queue(pktio_entry_t *pktio_entry ODP_UNUSED, - int index ODP_UNUSED, - odp_packet_t pkt_table[] ODP_UNUSED, - int num ODP_UNUSED) +static inline int mbuf_to_pkt(pktio_entry_t *pktio_entry, + odp_packet_t pkt_table[], + struct rte_mbuf *mbuf_table[], + uint16_t num) { - return 0; + odp_packet_t pkt; + odp_packet_hdr_t *pkt_hdr; + uint16_t pkt_len; + struct rte_mbuf *mbuf; + void *buf; + int i, j; + + for (i = 0; i < num; i++) { + mbuf = mbuf_table[i]; + if (odp_unlikely(mbuf->nb_segs != 1)) { + ODP_ERR("Segmented buffers not supported\n"); + goto fail; + } + + buf = rte_pktmbuf_mtod(mbuf, char *); + odp_prefetch(buf); + + pkt_len = rte_pktmbuf_pkt_len(mbuf); + + pkt = packet_alloc(pktio_entry->s.pkt_dpdk.pool, pkt_len, 1); + if (pkt == ODP_PACKET_INVALID) { + ODP_ERR("packet_alloc failed\n"); + goto fail; + } + + pkt_hdr = odp_packet_hdr(pkt); + + /* For now copy the data in the mbuf, + worry about zero-copy later */ + if (odp_packet_copydata_in(pkt, 0, pkt_len, buf) != 0) { + ODP_ERR("odp_packet_copydata_in failed\n"); + odp_packet_free(pkt); + goto fail; + } + + packet_parse_l2(pkt_hdr); + + pkt_hdr->input = pktio_entry->s.handle; + + if (mbuf->ol_flags & PKT_RX_RSS_HASH) { + pkt_hdr->has_hash = 1; + pkt_hdr->flow_hash = mbuf->hash.rss; + } + + pkt_table[i] = pkt; + + rte_pktmbuf_free(mbuf); + } + + return i; + +fail: + ODP_ERR("Creating ODP packet failed\n"); + for (j = i; j < num; j++) + rte_pktmbuf_free(mbuf_table[j]); + + return (i > 0 ? i : -1); } -static int dpdk_recv(pktio_entry_t *pktio_entry ODP_UNUSED, - odp_packet_t pkt_table[] ODP_UNUSED, - unsigned num ODP_UNUSED) +static inline int pkt_to_mbuf(pktio_entry_t *pktio_entry, + struct rte_mbuf *mbuf_table[], + odp_packet_t pkt_table[], uint16_t num) { - return 0; + pkt_dpdk_t *pkt_dpdk = &pktio_entry->s.pkt_dpdk; + int i; + char *data; + uint16_t pkt_len; + + for (i = 0; i < num; i++) { + mbuf_table[i] = rte_pktmbuf_alloc(pkt_dpdk->pkt_pool); + if (mbuf_table[i] == NULL) { + ODP_ERR("Failed to alloc mbuf\n"); + break; + } + + rte_pktmbuf_reset(mbuf_table[i]); + pkt_len = odp_packet_len(pkt_table[i]); + + data = rte_pktmbuf_append(mbuf_table[i], pkt_len); + + if (data == NULL) { + ODP_ERR("Failed to append mbuf\n"); + rte_pktmbuf_free(mbuf_table[i]); + break; + } + + odp_packet_copydata_out(pkt_table[i], 0, pkt_len, data); + } + return i; } -static int dpdk_send_queue(pktio_entry_t *pktio_entry ODP_UNUSED, - int index ODP_UNUSED, - odp_packet_t pkt_table[] ODP_UNUSED, - int num ODP_UNUSED) +static int dpdk_recv_queue(pktio_entry_t *pktio_entry, + int index, + odp_packet_t pkt_table[], + int num) { - return 0; + uint16_t nb_rx; + + struct rte_mbuf *rx_mbufs[num]; + + nb_rx = rte_eth_rx_burst(pktio_entry->s.pkt_dpdk.port_id, index, + rx_mbufs, num); + + if (nb_rx > 0) + nb_rx = mbuf_to_pkt(pktio_entry, pkt_table, rx_mbufs, nb_rx); + + return nb_rx; } -static int dpdk_send(pktio_entry_t *pktio_entry ODP_UNUSED, - odp_packet_t pkt_table[] ODP_UNUSED, - unsigned num ODP_UNUSED) +static int dpdk_recv(pktio_entry_t *pktio_entry, + odp_packet_t pkt_table[], + unsigned num) { - return 0; + return dpdk_recv_queue(pktio_entry, 0, pkt_table, num); +} + +static int dpdk_send_queue(pktio_entry_t *pktio_entry, + int index, + odp_packet_t pkt_table[], + int num) +{ + struct rte_mbuf *tx_mbufs[num]; + pkt_dpdk_t *pkt_dpdk = &pktio_entry->s.pkt_dpdk; + int tx_pkts; + int i; + int mbufs; + + mbufs = pkt_to_mbuf(pktio_entry, tx_mbufs, pkt_table, num); + + tx_pkts = rte_eth_tx_burst(pkt_dpdk->port_id, index, + tx_mbufs, mbufs); + + if (odp_unlikely(tx_pkts < num)) { + for (i = tx_pkts; i < mbufs; i++) + rte_pktmbuf_free(tx_mbufs[i]); + } + + odp_packet_free_multi(pkt_table, tx_pkts); + + if (odp_unlikely(tx_pkts == 0 && __odp_errno != 0)) + return -1; + + return tx_pkts; +} + +static int dpdk_send(pktio_entry_t *pktio_entry, + odp_packet_t pkt_table[], + unsigned num) +{ + return dpdk_send_queue(pktio_entry, 0, pkt_table, num); +} + +static int dpdk_mac_addr_get(pktio_entry_t *pktio_entry, void *mac_addr) +{ + rte_eth_macaddr_get(pktio_entry->s.pkt_dpdk.port_id, + (struct ether_addr *)mac_addr); + return ETH_ALEN; } const pktio_if_ops_t dpdk_pktio_ops = { .name = "dpdk", - .init_global = NULL, - .init_local = NULL, + .init_global = odp_dpdk_pktio_init_global, + .init_local = odp_dpdk_pktio_init_local, .term = NULL, .open = dpdk_open, .close = dpdk_close, @@ -84,7 +593,7 @@ const pktio_if_ops_t dpdk_pktio_ops = { .mtu_get = NULL, .promisc_mode_set = NULL, .promisc_mode_get = NULL, - .mac_get = NULL, + .mac_get = dpdk_mac_addr_get, .capability = NULL, .input_queues_config = NULL, .output_queues_config = NULL, -- 1.9.1 _______________________________________________ lng-odp mailing list lng-odp@lists.linaro.org https://lists.linaro.org/mailman/listinfo/lng-odp