Mykyta Iziumtsev(MykytaI) replied on github web page: platform/linux-generic/pktio/mdev/e1000e.c line 334 @@ -0,0 +1,605 @@ +/* Copyright (c) 2018, Linaro Limited + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "config.h" + +#ifdef ODP_MDEV + +#include <linux/types.h> +#include <protocols/eth.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <odp_packet_io_internal.h> +#include <odp_posix_extensions.h> + +#include <odp/api/hints.h> +#include <odp/api/packet.h> +#include <odp/api/plat/packet_inlines.h> +#include <odp/drv/hints.h> +#include <odp/drv/mmio.h> + +#include <pktio/common.h> +#include <pktio/ethtool.h> +#include <pktio/mdev.h> +#include <pktio/sysfs.h> +#include <pktio/uapi_net_mdev.h> + +#define MODULE_NAME "e1000e" + +#define E1000E_TX_BUF_SIZE 2048U +#define E1000E_RX_BUF_SIZE 2048U + +/* RX queue definitions */ +#define E1000E_RX_QUEUE_NUM_MAX 1 + +#define E1000_RDH_OFFSET 0x02810UL +#define E1000_RDT_OFFSET 0x02818UL + +/** RX descriptor */ +typedef struct { + odp_u64le_t addr; +#define E1000E_RXD_STATUS_DONE 0x00000001UL +#define E1000E_RXD_STATUS_ERR_MASK 0xff000000UL + odp_u32le_t status; + odp_u16le_t length; + odp_u16le_t vlan; +} e1000e_rx_desc_t; + +/** RX queue data */ +typedef struct ODP_ALIGNED_CACHE { + e1000e_rx_desc_t *rx_descs; /**< RX queue base */ + odp_u32le_t *doorbell; /**< RX queue doorbell */ + + uint16_t rx_queue_len; /**< Number of RX desc entries */ + + uint16_t cidx; /**< Next RX desc to read */ + odp_u32le_t *pidx; /**< Next RX desc HW is going to write */ + + mdev_dma_area_t rx_data; /**< RX packet payload area */ + + odp_ticketlock_t lock; /**< RX queue lock */ +} e1000e_rx_queue_t; + +/* TX queue definitions */ +#define E1000E_TX_QUEUE_NUM_MAX 1 + +#define E1000_TDH_OFFSET 0x03810UL +#define E1000_TDT_OFFSET 0x03818UL + +typedef struct { + odp_u64le_t addr; /* Address of data buffer */ +#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ +#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ + odp_u32le_t cmd; + odp_u32le_t reserved; +} e1000e_tx_desc_t; + +/** TX queue data */ +typedef struct ODP_ALIGNED_CACHE { + e1000e_tx_desc_t *tx_descs; /**< TX queue base */ + odp_u32le_t *doorbell; /**< TX queue doorbell */ + + uint16_t tx_queue_len; /**< Number of TX desc entries */ + + uint16_t pidx; /**< Next TX desc to write */ + odp_u32le_t *cidx; /**< Next TX desc HW is going to read */ + + mdev_dma_area_t tx_data; /**< TX packet payload area */ + + odp_ticketlock_t lock; /**< TX queue lock */ +} e1000e_tx_queue_t; + +/** Packet socket using mediated e1000e device */ +typedef struct { + /** RX queue hot data */ + e1000e_rx_queue_t rx_queues[E1000E_RX_QUEUE_NUM_MAX]; + + /** TX queue hot data */ + e1000e_tx_queue_t tx_queues[E1000E_TX_QUEUE_NUM_MAX]; + + odp_pool_t pool; /**< pool to alloc packets from */ + + odp_bool_t lockless_rx; /**< no locking for RX */ + odp_bool_t lockless_tx; /**< no locking for TX */ + + odp_pktio_capability_t capa; /**< interface capabilities */ + + uint8_t *mmio; /**< MMIO region */ + + int sockfd; /**< control socket */ + + mdev_device_t mdev; /**< Common mdev data */ +} pktio_ops_e1000e_data_t; + +static void e1000e_rx_refill(e1000e_rx_queue_t *rxq, uint16_t from, + uint16_t num); +static void e1000e_wait_link_up(pktio_entry_t *pktio_entry); +static int e1000e_close(pktio_entry_t *pktio_entry); + +static int e1000e_mmio_register(pktio_ops_e1000e_data_t *pkt_e1000e, + uint64_t offset, uint64_t size) +{ + ODP_ASSERT(pkt_e1000e->mmio == NULL); + + pkt_e1000e->mmio = mdev_region_mmap(&pkt_e1000e->mdev, offset, size); + if (pkt_e1000e->mmio == MAP_FAILED) { + ODP_ERR("Cannot mmap MMIO\n"); + return -1; + } + + ODP_DBG("Register MMIO region: 0x%llx@%016llx\n", size, offset); + + return 0; +} + +static int e1000e_rx_queue_register(pktio_ops_e1000e_data_t *pkt_e1000e, + uint64_t offset, uint64_t size) +{ + uint16_t rxq_idx = pkt_e1000e->capa.max_input_queues++; + e1000e_rx_queue_t *rxq = &pkt_e1000e->rx_queues[rxq_idx]; + struct ethtool_ringparam ering; + int ret; + + ODP_ASSERT(rxq_idx < ARRAY_SIZE(pkt_e1000e->rx_queues)); + + odp_ticketlock_init(&rxq->lock); + + ret = ethtool_ringparam_get_fd(pkt_e1000e->sockfd, + pkt_e1000e->mdev.if_name, &ering); + if (ret) { + ODP_ERR("Cannot get queue length\n"); + return -1; + } + rxq->rx_queue_len = ering.rx_pending; + + rxq->doorbell = + (odp_u32le_t *)(void *)(pkt_e1000e->mmio + E1000_RDT_OFFSET); + + ODP_ASSERT(rxq->rx_queue_len * sizeof(*rxq->rx_descs) <= size); + + rxq->rx_descs = mdev_region_mmap(&pkt_e1000e->mdev, offset, size); + if (rxq->rx_descs == MAP_FAILED) { + ODP_ERR("Cannot mmap RX queue\n"); + return -1; + } + + rxq->pidx = + (odp_u32le_t *)(void *)(pkt_e1000e->mmio + E1000_RDH_OFFSET); + + rxq->rx_data.size = rxq->rx_queue_len * E1000E_RX_BUF_SIZE; + ret = mdev_dma_area_alloc(&pkt_e1000e->mdev, &rxq->rx_data); + if (ret) { + ODP_ERR("Cannot allocate RX queue DMA area\n"); + return -1; + } + + /* Need 1 desc gap to keep tail from touching head */ + e1000e_rx_refill(rxq, 0, rxq->rx_queue_len - 1); + + ODP_DBG("Register RX queue region: 0x%llx@%016llx\n", size, offset); + ODP_DBG(" RX descriptors: %u\n", rxq->rx_queue_len); + + return 0; +} + +static int e1000e_tx_queue_register(pktio_ops_e1000e_data_t *pkt_e1000e, + uint64_t offset, uint64_t size) +{ + uint16_t txq_idx = pkt_e1000e->capa.max_output_queues++; + e1000e_tx_queue_t *txq = &pkt_e1000e->tx_queues[txq_idx]; + struct ethtool_ringparam ering; + int ret; + + ODP_ASSERT(txq_idx < ARRAY_SIZE(pkt_e1000e->tx_queues)); + + odp_ticketlock_init(&txq->lock); + + ret = ethtool_ringparam_get_fd(pkt_e1000e->sockfd, + pkt_e1000e->mdev.if_name, &ering); + if (ret) { + ODP_ERR("Cannot get queue length\n"); + return -1; + } + txq->tx_queue_len = ering.tx_pending; + + txq->doorbell = + (odp_u32le_t *)(void *)(pkt_e1000e->mmio + E1000_TDT_OFFSET); + + ODP_ASSERT(txq->tx_queue_len * sizeof(*txq->tx_descs) <= size); + + txq->tx_descs = mdev_region_mmap(&pkt_e1000e->mdev, offset, size); + if (txq->tx_descs == MAP_FAILED) { + ODP_ERR("Cannot mmap TX queue\n"); + return -1; + } + + txq->cidx = + (odp_u32le_t *)(void *)(pkt_e1000e->mmio + E1000_TDH_OFFSET); + + txq->tx_data.size = txq->tx_queue_len * E1000E_TX_BUF_SIZE; + ret = mdev_dma_area_alloc(&pkt_e1000e->mdev, &txq->tx_data); + if (ret) { + ODP_ERR("Cannot allocate TX queue DMA area\n"); + return -1; + } + + ODP_DBG("Register TX queue region: 0x%llx@%016llx\n", size, offset); + ODP_DBG(" TX descriptors: %u\n", txq->tx_queue_len); + + return 0; +} + +static int e1000e_region_info_cb(mdev_device_t *mdev, + struct vfio_region_info *region_info) +{ + pktio_ops_e1000e_data_t *pkt_e1000e = + odp_container_of(mdev, pktio_ops_e1000e_data_t, mdev); + mdev_region_class_t class_info; + + if (vfio_get_region_cap_type(region_info, &class_info) < 0) { + ODP_ERR("Cannot find class_info in region %u\n", + region_info->index); + return -1; + } + + switch (class_info.type) { + case VFIO_NET_MDEV_MMIO: + return e1000e_mmio_register(pkt_e1000e, + region_info->offset, + region_info->size); + + case VFIO_NET_MDEV_RX_RING: + return e1000e_rx_queue_register(pkt_e1000e, + region_info->offset, + region_info->size); + + case VFIO_NET_MDEV_TX_RING: + return e1000e_tx_queue_register(pkt_e1000e, + region_info->offset, + region_info->size); + + default: + ODP_ERR("Unexpected region %u (class %u:%u)\n", + region_info->index, class_info.type, + class_info.subtype); + return -1; + } +} + +static int e1000e_open(odp_pktio_t id ODP_UNUSED, + pktio_entry_t *pktio_entry, + const char *resource, odp_pool_t pool) +{ + pktio_ops_e1000e_data_t *pkt_e1000e; + int ret; + + ODP_ASSERT(pool != ODP_POOL_INVALID); + + if (strncmp(resource, NET_MDEV_PREFIX, strlen(NET_MDEV_PREFIX))) + return -1; + + ODP_DBG("%s: probing resource %s\n", MODULE_NAME, resource); + + pkt_e1000e = ODP_OPS_DATA_ALLOC(sizeof(*pkt_e1000e)); + if (odp_unlikely(pkt_e1000e == NULL)) { + ODP_ERR("Failed to allocate pktio_ops_e1000e_data_t struct"); + return -1; + } + pktio_entry->s.ops_data = pkt_e1000e; + + memset(pkt_e1000e, 0, sizeof(*pkt_e1000e)); + + pkt_e1000e->sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (pkt_e1000e->sockfd == -1) { + ODP_ERR("Cannot get device control socket\n"); + goto out; + } + + ret = + mdev_device_create(&pkt_e1000e->mdev, MODULE_NAME, + resource + strlen(NET_MDEV_PREFIX), + e1000e_region_info_cb); + if (ret) + goto out; + + pkt_e1000e->pool = pool; + + e1000e_wait_link_up(pktio_entry); + + ODP_DBG("%s: open %s is successful\n", MODULE_NAME, + pkt_e1000e->mdev.if_name); + + return 0; + +out: + e1000e_close(pktio_entry); + return -1; +} + +static int e1000e_close(pktio_entry_t *pktio_entry) +{ + pktio_ops_e1000e_data_t *pkt_e1000e = pktio_entry->s.ops_data; + + ODP_DBG("%s: close %s\n", MODULE_NAME, pkt_e1000e->mdev.if_name); + + mdev_device_destroy(&pkt_e1000e->mdev); + + for (uint16_t i = 0; i < pkt_e1000e->capa.max_input_queues; i++) {
Comment: Done. > Mykyta Iziumtsev(MykytaI) wrote: > Looking at include/config.h.in (where ODP_MDEV comes from) it's not the only > naming violation. There are at least 7 other pktio (internal) macros which > don't start with underscore. Please confirm that we really need to add > underscore for ODP_MDEV and I'll update next patch version with that ... >> Bill Fischofer(Bill-Fischofer-Linaro) wrote: >> @muvarov In this case the variable is being declared at the beginning of >> scope. The scope in this instance is the block defined by the `for` loop. >> That citation is saying you shouldn't declare variables in the middle of a >> scope. While that's still correct C, it can be confusing. >>> muvarov wrote >>> ./doc/application-api-guide/api_guide_lines.dox line 162 >>>> Bill Fischofer(Bill-Fischofer-Linaro) wrote: >>>> @muvarov This idiom is part of C99, so I see no problem with it here. It's >>>> also why checkpatch doesn't complain. >>>>> Bill Fischofer(Bill-Fischofer-Linaro) wrote: >>>>> We have the various `odpdrv_mmio_uxx_read/write()` APIs. Should we also >>>>> have `_addr` variants that would similarly enable address loading to >>>>> handle this sort of awkward casting? E.g.: >>>>> ``` >>>>> rxq->doorbell = odpdrv_u32le_addr(pkt_e100e->mmio + E1000_RDT_OFFSET); >>>>> ``` >>>>> Similar comments for other address setups contained here. >>>>>> Bill Fischofer(Bill-Fischofer-Linaro) wrote: >>>>>> Might be good to document where the various "magic numbers" come from. >>>>>> Presumably from some hardware manual? Does Intel provide a `.h` file >>>>>> that we can reference/`#include` for these rather than duplicate them >>>>>> here? >>>>>>> Bill Fischofer(Bill-Fischofer-Linaro) wrote: >>>>>>> Agree with @muvarov. `_ODP_MDEV` is fine here. The `ODP` prefix should >>>>>>> be reserved for things that are part of the external ODP API. >>>>>>>> muvarov wrote >>>>>>>> place type out of for(). checkpatch should warn about that. >>>>>>>>> muvarov wrote >>>>>>>>> please keep ODP_ and odp_ prefix only for odp api. Internal things >>>>>>>>> should be named as _ODP_ or without that prefix. This comment not >>>>>>>>> just for this commit but it has to be corrected. https://github.com/Linaro/odp/pull/408#discussion_r163485788 updated_at 2018-01-24 08:59:25