On 12.07.2016 12:11, Ciara Loftus wrote:
> This commit introduces support for DPDK 16.07 and consequently breaks
> compatibility with DPDK 16.04.
>
> DPDK 16.07 introduces some changes to various APIs. These have been
> updated in OVS, including:
> * xstats API: changes to structure of xstats
> * vhost API: replace virtio-net references with 'vid'
>
> Signed-off-by: Ciara Loftus <[email protected]>
> ---
> .travis/linux-build.sh | 2 +-
> INSTALL.DPDK-ADVANCED.md | 8 +-
> INSTALL.DPDK.md | 20 ++--
> lib/netdev-dpdk.c | 243
> +++++++++++++++++++++++------------------------
> 4 files changed, 135 insertions(+), 138 deletions(-)
>
> diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
> index 065de39..1b3d43d 100755
> --- a/.travis/linux-build.sh
> +++ b/.travis/linux-build.sh
> @@ -68,7 +68,7 @@ fi
>
> if [ "$DPDK" ]; then
> if [ -z "$DPDK_VER" ]; then
> - DPDK_VER="16.04"
> + DPDK_VER="16.07"
> fi
> install_dpdk $DPDK_VER
> if [ "$CC" = "clang" ]; then
> diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md
> index 9ae536d..ec1de29 100644
> --- a/INSTALL.DPDK-ADVANCED.md
> +++ b/INSTALL.DPDK-ADVANCED.md
> @@ -43,7 +43,7 @@ for DPDK and OVS.
> For IVSHMEM case, set `export DPDK_TARGET=x86_64-ivshmem-linuxapp-gcc`
>
> ```
> - export DPDK_DIR=/usr/src/dpdk-16.04
> + export DPDK_DIR=/usr/src/dpdk-16.07
> export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET
> make install T=$DPDK_TARGET DESTDIR=install
> ```
> @@ -339,7 +339,7 @@ For users wanting to do packet forwarding using kernel
> stack below are the steps
> cd /usr/src/cmdline_generator
> wget
> https://raw.githubusercontent.com/netgroup-polito/un-orchestrator/master/orchestrator/compute_controller/plugins/kvm-libvirt/cmdline_generator/cmdline_generator.c
> wget
> https://raw.githubusercontent.com/netgroup-polito/un-orchestrator/master/orchestrator/compute_controller/plugins/kvm-libvirt/cmdline_generator/Makefile
> - export RTE_SDK=/usr/src/dpdk-16.04
> + export RTE_SDK=/usr/src/dpdk-16.07
> export RTE_TARGET=x86_64-ivshmem-linuxapp-gcc
> make
> ./build/cmdline_generator -m -p dpdkr0 XXX
> @@ -363,7 +363,7 @@ For users wanting to do packet forwarding using kernel
> stack below are the steps
> mount -t hugetlbfs nodev /dev/hugepages (if not already mounted)
>
> # Build the DPDK ring application in the VM
> - export RTE_SDK=/root/dpdk-16.04
> + export RTE_SDK=/root/dpdk-16.07
> export RTE_TARGET=x86_64-ivshmem-linuxapp-gcc
> make
>
> @@ -374,7 +374,7 @@ For users wanting to do packet forwarding using kernel
> stack below are the steps
>
> ## <a name="vhost"></a> 6. Vhost Walkthrough
>
> -DPDK 16.04 supports two types of vhost:
> +DPDK 16.07 supports two types of vhost:
>
> 1. vhost-user - enabled default
>
> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> index 5407794..9022ad8 100644
> --- a/INSTALL.DPDK.md
> +++ b/INSTALL.DPDK.md
> @@ -21,7 +21,7 @@ The DPDK support of Open vSwitch is considered
> 'experimental'.
>
> ### Prerequisites
>
> -* Required: DPDK 16.04, libnuma
> +* Required: DPDK 16.07, libnuma
> * Hardware: [DPDK Supported NICs] when physical ports in use
>
> ## <a name="build"></a> 2. Building and Installation
> @@ -42,10 +42,10 @@ advanced install guide [INSTALL.DPDK-ADVANCED.md]
>
> ```
> cd /usr/src/
> - wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.04.zip
> - unzip dpdk-16.04.zip
> + wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.07.zip
> + unzip dpdk-16.07.zip
>
> - export DPDK_DIR=/usr/src/dpdk-16.04
> + export DPDK_DIR=/usr/src/dpdk-16.07
> cd $DPDK_DIR
> ```
>
> @@ -329,9 +329,9 @@ can be found in [Vhost Walkthrough].
>
> ```
> cd /root/dpdk/
> - wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.04.zip
> - unzip dpdk-16.04.zip
> - export DPDK_DIR=/root/dpdk/dpdk-16.04
> + wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.07.zip
> + unzip dpdk-16.07.zip
> + export DPDK_DIR=/root/dpdk/dpdk-16.07
> export DPDK_TARGET=x86_64-native-linuxapp-gcc
> export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET
> cd $DPDK_DIR
> @@ -487,7 +487,7 @@ can be found in [Vhost Walkthrough].
> </disk>
> <disk type='dir' device='disk'>
> <driver name='qemu' type='fat'/>
> - <source dir='/usr/src/dpdk-16.04'/>
> + <source dir='/usr/src/dpdk-16.07'/>
> <target dev='vdb' bus='virtio'/>
> <readonly/>
> </disk>
> @@ -557,9 +557,9 @@ can be found in [Vhost Walkthrough].
> DPDK. It is recommended that users update Network Interface firmware to
> match what has been validated for the DPDK release.
>
> - For DPDK 16.04, the list of validated firmware versions can be found at:
> + For DPDK 16.07, the list of validated firmware versions can be found at:
>
> - http://dpdk.org/doc/guides/rel_notes/release_16_04.html
> + http://dpdk.org/doc/guides/rel_notes/release_16.07.html
>
>
> Bug Reporting:
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 85b18fd..9cf0b0c 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -356,8 +356,8 @@ struct netdev_dpdk {
> * always true. */
> bool txq_needs_locking;
>
> - /* virtio-net structure for vhost device */
> - OVSRCU_TYPE(struct virtio_net *) virtio_dev;
> + /* virtio identifier for vhost devices */
> + int vid;
>
> /* Identifier used to distinguish vhost devices from each other */
> char vhost_id[PATH_MAX];
> @@ -393,8 +393,6 @@ static bool dpdk_thread_is_pmd(void);
>
> static int netdev_dpdk_construct(struct netdev *);
>
> -struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
> -
> struct ingress_policer *
> netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);
>
> @@ -749,6 +747,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
> dev->flags = 0;
> dev->mtu = ETHER_MTU;
> dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
> + dev->vid = -1;
>
> buf_size = dpdk_buf_size(dev->mtu);
> dev->dpdk_mp = dpdk_mp_get(dev->socket_id, FRAME_LEN_TO_MTU(buf_size));
> @@ -846,6 +845,7 @@ netdev_dpdk_vhost_user_construct(struct netdev *netdev)
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> const char *name = netdev->name;
> int err;
> + uint64_t flags = 0;
>
> /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in
> * the file system. '/' or '\' would traverse directories, so they're not
> @@ -868,7 +868,7 @@ netdev_dpdk_vhost_user_construct(struct netdev *netdev)
> snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",
> vhost_sock_dir, name);
>
> - err = rte_vhost_driver_register(dev->vhost_id);
> + err = rte_vhost_driver_register(dev->vhost_id, flags);
> if (err) {
> VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
> dev->vhost_id);
> @@ -923,13 +923,19 @@ netdev_dpdk_destruct(struct netdev *netdev)
> ovs_mutex_unlock(&dpdk_mutex);
> }
>
> +static bool
> +is_vhost_running(int vid)
> +{
> + return vid >= 0;
> +}
> +
> static void
> netdev_dpdk_vhost_destruct(struct netdev *netdev)
> {
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
> /* Guest becomes an orphan if still attached. */
> - if (netdev_dpdk_get_virtio(dev) != NULL) {
> + if (is_vhost_running(dev->vid)) {
> VLOG_ERR("Removing port '%s' while vhost device still attached.",
> netdev->name);
> VLOG_ERR("To restore connectivity after re-adding of port, VM on
> socket"
> @@ -1143,12 +1149,6 @@ ingress_policer_run(struct ingress_policer *policer,
> struct rte_mbuf **pkts,
> return cnt;
> }
>
> -static bool
> -is_vhost_running(struct virtio_net *virtio_dev)
> -{
> - return (virtio_dev != NULL && (virtio_dev->flags & VIRTIO_DEV_RUNNING));
> -}
> -
> static inline void
> netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats *stats,
> unsigned int packet_size)
> @@ -1218,18 +1218,17 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> struct dp_packet **packets, int *c)
> {
> struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> - struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> int qid = rxq->queue_id;
> struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
> uint16_t nb_rx = 0;
> uint16_t dropped = 0;
>
> - if (OVS_UNLIKELY(!is_vhost_running(virtio_dev)
> + if (OVS_UNLIKELY(!is_vhost_running(dev->vid)
> || !(dev->flags & NETDEV_UP))) {
> return EAGAIN;
> }
>
> - nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM +
> VIRTIO_TXQ,
> + nb_rx = rte_vhost_dequeue_burst(dev->vid, qid * VIRTIO_QNUM + VIRTIO_TXQ,
> dev->dpdk_mp->mp,
> (struct rte_mbuf **)packets,
> NETDEV_MAX_BURST);
> @@ -1326,7 +1325,6 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> bool may_steal)
> {
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> - struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
> unsigned int total_pkts = cnt;
> unsigned int qos_pkts = cnt;
> @@ -1334,7 +1332,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>
> qid = dev->tx_q[qid % netdev->n_txq].map;
>
> - if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0
> + if (OVS_UNLIKELY(!is_vhost_running(dev->vid) || qid < 0
> || !(dev->flags & NETDEV_UP))) {
> rte_spinlock_lock(&dev->stats_lock);
> dev->stats.tx_dropped+= cnt;
> @@ -1352,7 +1350,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
> unsigned int tx_pkts;
>
> - tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,
> + tx_pkts = rte_vhost_enqueue_burst(dev->vid, vhost_qid,
> cur_pkts, cnt);
> if (OVS_LIKELY(tx_pkts)) {
> /* Packets have been sent.*/
> @@ -1707,60 +1705,50 @@ netdev_dpdk_vhost_get_stats(const struct netdev
> *netdev,
>
> static void
> netdev_dpdk_convert_xstats(struct netdev_stats *stats,
> - const struct rte_eth_xstats *xstats,
> + const struct rte_eth_xstat *xstats,
> + const struct rte_eth_xstat_name *names,
> const unsigned int size)
> {
> - /* XXX Current implementation is simple search through an array
> - * to find hardcoded counter names. In future DPDK release (TBD)
> - * XSTATS API will change so each counter will be represented by
> - * unique ID instead of String. */
> -
> for (unsigned int i = 0; i < size; i++) {
> - if (strcmp(XSTAT_RX_64_PACKETS, xstats[i].name) == 0) {
> + if (strcmp(XSTAT_RX_64_PACKETS, names[i].name) == 0) {
> stats->rx_1_to_64_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_RX_65_TO_127_PACKETS, xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_RX_65_TO_127_PACKETS, names[i].name) == 0) {
> stats->rx_65_to_127_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_RX_128_TO_255_PACKETS, xstats[i].name) == 0)
> {
> + } else if (strcmp(XSTAT_RX_128_TO_255_PACKETS, names[i].name) == 0) {
> stats->rx_128_to_255_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_RX_256_TO_511_PACKETS, xstats[i].name) == 0)
> {
> + } else if (strcmp(XSTAT_RX_256_TO_511_PACKETS, names[i].name) == 0) {
> stats->rx_256_to_511_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_RX_512_TO_1023_PACKETS,
> - xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_RX_512_TO_1023_PACKETS, names[i].name) == 0)
> {
> stats->rx_512_to_1023_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_RX_1024_TO_1522_PACKETS,
> - xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_RX_1024_TO_1522_PACKETS, names[i].name) ==
> 0) {
> stats->rx_1024_to_1522_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_RX_1523_TO_MAX_PACKETS,
> - xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_RX_1523_TO_MAX_PACKETS, names[i].name) == 0)
> {
> stats->rx_1523_to_max_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_TX_64_PACKETS, xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_TX_64_PACKETS, names[i].name) == 0) {
> stats->tx_1_to_64_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_TX_65_TO_127_PACKETS, xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_TX_65_TO_127_PACKETS, names[i].name) == 0) {
> stats->tx_65_to_127_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_TX_128_TO_255_PACKETS, xstats[i].name) == 0)
> {
> + } else if (strcmp(XSTAT_TX_128_TO_255_PACKETS, names[i].name) == 0) {
> stats->tx_128_to_255_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_TX_256_TO_511_PACKETS, xstats[i].name) == 0)
> {
> + } else if (strcmp(XSTAT_TX_256_TO_511_PACKETS, names[i].name) == 0) {
> stats->tx_256_to_511_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_TX_512_TO_1023_PACKETS,
> - xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_TX_512_TO_1023_PACKETS, names[i].name) == 0)
> {
> stats->tx_512_to_1023_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_TX_1024_TO_1522_PACKETS,
> - xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_TX_1024_TO_1522_PACKETS, names[i].name) ==
> 0) {
> stats->tx_1024_to_1522_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_TX_1523_TO_MAX_PACKETS,
> - xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_TX_1523_TO_MAX_PACKETS, names[i].name) == 0)
> {
> stats->tx_1523_to_max_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_TX_MULTICAST_PACKETS, xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_TX_MULTICAST_PACKETS, names[i].name) == 0) {
> stats->tx_multicast_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_RX_BROADCAST_PACKETS, xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_RX_BROADCAST_PACKETS, names[i].name) == 0) {
> stats->rx_broadcast_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_TX_BROADCAST_PACKETS, xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_TX_BROADCAST_PACKETS, names[i].name) == 0) {
> stats->tx_broadcast_packets = xstats[i].value;
> - } else if (strcmp(XSTAT_RX_UNDERSIZED_ERRORS, xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_RX_UNDERSIZED_ERRORS, names[i].name) == 0) {
> stats->rx_undersized_errors = xstats[i].value;
> - } else if (strcmp(XSTAT_RX_FRAGMENTED_ERRORS, xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_RX_FRAGMENTED_ERRORS, names[i].name) == 0) {
> stats->rx_fragmented_errors = xstats[i].value;
> - } else if (strcmp(XSTAT_RX_JABBER_ERRORS, xstats[i].name) == 0) {
> + } else if (strcmp(XSTAT_RX_JABBER_ERRORS, names[i].name) == 0) {
> stats->rx_jabber_errors = xstats[i].value;
> }
> }
> @@ -1776,7 +1764,8 @@ netdev_dpdk_get_stats(const struct netdev *netdev,
> struct netdev_stats *stats)
> netdev_dpdk_get_carrier(netdev, &gg);
> ovs_mutex_lock(&dev->mutex);
>
> - struct rte_eth_xstats *rte_xstats;
> + struct rte_eth_xstat *rte_xstats;
> + struct rte_eth_xstat_name *rte_xstats_names;
> int rte_xstats_len, rte_xstats_ret;
>
> if (rte_eth_stats_get(dev->port_id, &rte_stats)) {
> @@ -1785,20 +1774,51 @@ netdev_dpdk_get_stats(const struct netdev *netdev,
> struct netdev_stats *stats)
> return EPROTO;
> }
>
> - rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
> - if (rte_xstats_len > 0) {
> - rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len);
> - memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
> - rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
> - rte_xstats_len);
> - if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
> - netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);
> - }
> + /* Get length of statistics */
> + rte_xstats_len = rte_eth_xstats_get_names(dev->port_id, NULL, 0);
> + if (rte_xstats_len < 0) {
> + VLOG_WARN("Cannot get XSTATS values for port: %i", dev->port_id);
> + goto out;
> + }
> + /* Reserve memory for xstats names */
> + rte_xstats_names = dpdk_rte_mzalloc(sizeof(*rte_xstats_names)
> + * rte_xstats_len);
> + if (rte_xstats_names == NULL) {
> + VLOG_WARN("Cannot allocate memory for XSTATS names for port %i",
> + dev->port_id);
> + rte_free(rte_xstats_names);
> + goto out;
> + }
> + /* Reserve memory for xstats values */
> + rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len);
> + if (rte_xstats == NULL) {
> + VLOG_WARN("Cannot allocate memory for XSTATS values for port %i",
> + dev->port_id);
> rte_free(rte_xstats);
> + goto out;
> + }
> + /* Retreive xstats names */
> + if (rte_xstats_len != rte_eth_xstats_get_names(dev->port_id,
> + rte_xstats_names, rte_xstats_len)) {
> + VLOG_WARN("Cannot get XSTATS names for port: %i.", dev->port_id);
> + rte_free(rte_xstats);
> + rte_free(rte_xstats_names);
> + goto out;
> + }
> + /* Retreive xstats values */
> + memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
> + rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
> + rte_xstats_len);
> + if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
> + netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_names,
> + rte_xstats_len);
> } else {
> - VLOG_WARN("Can't get XSTATS counters for port: %i.", dev->port_id);
> + VLOG_WARN("Cannot get XSTATS values for port: %i.", dev->port_id);
> + rte_free(rte_xstats);
> + rte_free(rte_xstats_names);
> }
>
> +out:
> stats->rx_packets = rte_stats.ipackets;
> stats->tx_packets = rte_stats.opackets;
> stats->rx_bytes = rte_stats.ibytes;
> @@ -1806,7 +1826,6 @@ netdev_dpdk_get_stats(const struct netdev *netdev,
> struct netdev_stats *stats)
> /* DPDK counts imissed as errors, but count them here as dropped instead
> */
> stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
> stats->tx_errors = rte_stats.oerrors;
> - stats->multicast = rte_stats.imcasts;
>
> rte_spinlock_lock(&dev->stats_lock);
> stats->tx_dropped = dev->stats.tx_dropped;
> @@ -1973,11 +1992,10 @@ static int
> netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool *carrier)
> {
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> - struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>
> ovs_mutex_lock(&dev->mutex);
>
> - if (is_vhost_running(virtio_dev)) {
> + if (is_vhost_running(dev->vid)) {
> *carrier = 1;
> } else {
> *carrier = 0;
> @@ -2045,10 +2063,9 @@ netdev_dpdk_update_flags__(struct netdev_dpdk *dev,
> /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and vhost is
> * running then change netdev's change_seq to trigger link state
> * update. */
> - struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>
> if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))
> - && is_vhost_running(virtio_dev)) {
> + && is_vhost_running(dev->vid)){
> netdev_change_seq_changed(&dev->up);
>
> /* Clear statistics if device is getting up. */
> @@ -2176,15 +2193,15 @@ netdev_dpdk_set_admin_state(struct unixctl_conn
> *conn, int argc,
> * Set virtqueue flags so that we do not receive interrupts.
> */
> static void
> -set_irq_status(struct virtio_net *virtio_dev)
> +set_irq_status(int vid)
> {
> uint32_t i;
> uint64_t idx;
>
> - for (i = 0; i < virtio_dev->virt_qp_nb; i++) {
> + for (i = 0; i < rte_vhost_get_queue_num(vid); i++) {
> idx = i * VIRTIO_QNUM;
> - rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_RXQ, 0);
> - rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_TXQ, 0);
> + rte_vhost_enable_guest_notification(vid, idx + VIRTIO_RXQ, 0);
> + rte_vhost_enable_guest_notification(vid, idx + VIRTIO_TXQ, 0);
> }
> }
>
> @@ -2233,26 +2250,27 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk *dev)
> * A new virtio-net device is added to a vhost port.
> */
> static int
> -new_device(struct virtio_net *virtio_dev)
> +new_device(int vid)
> {
> struct netdev_dpdk *dev;
> bool exists = false;
> int newnode = 0;
> - long err = 0;
> + char ifname[PATH_MAX];
> +
> + rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
>
> ovs_mutex_lock(&dpdk_mutex);
> /* Add device to the vhost port with the same name as that passed down.
> */
> LIST_FOR_EACH(dev, list_node, &dpdk_list) {
> - if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
> - uint32_t qp_num = virtio_dev->virt_qp_nb;
> + if (strncmp(ifname, dev->vhost_id, PATH_MAX) == 0) {
> + uint32_t qp_num = rte_vhost_get_queue_num(vid);
>
> ovs_mutex_lock(&dev->mutex);
> /* Get NUMA information */
> - err = get_mempolicy(&newnode, NULL, 0, virtio_dev,
> - MPOL_F_NODE | MPOL_F_ADDR);
> - if (err) {
> - VLOG_INFO("Error getting NUMA info for vHost Device '%s'",
> - virtio_dev->ifname);
> + newnode = rte_vhost_get_numa_node(vid);
> + if ((newnode != -1) && (newnode != dev->socket_id)) {
> + dev->requested_socket_id = newnode;
> + } else {
> newnode = dev->socket_id;
> }
'requested_socket_id' will be updated on the next line.
How about just this:
/* Get NUMA information */
newnode = rte_vhost_get_numa_node(vid);
if (newnode == -1) {
newnode = dev->socket_id;
}
>
> @@ -2261,11 +2279,11 @@ new_device(struct virtio_net *virtio_dev)
> dev->requested_n_txq = qp_num;
> netdev_request_reconfigure(&dev->up);
>
> - ovsrcu_set(&dev->virtio_dev, virtio_dev);
> + dev->vid = vid;
> exists = true;
>
> /* Disable notifications. */
> - set_irq_status(virtio_dev);
> + set_irq_status(dev->vid);
> netdev_change_seq_changed(&dev->up);
> ovs_mutex_unlock(&dev->mutex);
> break;
> @@ -2274,14 +2292,14 @@ new_device(struct virtio_net *virtio_dev)
> ovs_mutex_unlock(&dpdk_mutex);
>
> if (!exists) {
> - VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name not "
> - "found", virtio_dev->ifname, virtio_dev->device_fh);
> + VLOG_INFO("vHost Device '%s' can't be added - name not found",
> ifname);
>
> return -1;
> }
>
> - VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa node %i",
> - virtio_dev->ifname, virtio_dev->device_fh, newnode);
> + VLOG_INFO("vHost Device '%s' has been added on numa node %i",
> + ifname, newnode);
> +
> return 0;
> }
>
> @@ -2304,18 +2322,20 @@ netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
> * the device.
> */
> static void
> -destroy_device(volatile struct virtio_net *virtio_dev)
> +destroy_device(int vid)
> {
> struct netdev_dpdk *dev;
> bool exists = false;
> + char ifname[PATH_MAX];
> +
> + rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
>
> ovs_mutex_lock(&dpdk_mutex);
> LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> - if (netdev_dpdk_get_virtio(dev) == virtio_dev) {
> + if (dev->vid == vid) {
>
> ovs_mutex_lock(&dev->mutex);
> - virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;
> - ovsrcu_set(&dev->virtio_dev, NULL);
> + dev->vid = -1;
> /* Clear tx/rx queue settings. */
> netdev_dpdk_txq_map_clear(dev);
> dev->requested_n_rxq = NR_QUEUE;
> @@ -2332,31 +2352,21 @@ destroy_device(volatile struct virtio_net *virtio_dev)
> ovs_mutex_unlock(&dpdk_mutex);
>
> if (exists == true) {
> - /*
> - * Wait for other threads to quiesce after setting the 'virtio_dev'
> - * to NULL, before returning.
> - */
> - ovsrcu_synchronize();
> - /*
> - * As call to ovsrcu_synchronize() will end the quiescent state,
> - * put thread back into quiescent state before returning.
> - */
> - ovsrcu_quiesce_start();
> - VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed",
> - virtio_dev->ifname, virtio_dev->device_fh);
> + VLOG_INFO("vHost Device '%s' has been removed", ifname);
'ovsrcu_synchronize()' here was used to be sure that all PMD threads are
stopped their rx/tx operations on this vhost device. Without it there will
be race between freeing of vhost device inside DPDK and rx/tx operations on it.
You should not remove this block of code. Comments, I guess, should be fixed
somehow.
> } else {
> - VLOG_INFO("vHost Device '%s' %"PRIu64" not found",
> virtio_dev->ifname,
> - virtio_dev->device_fh);
> + VLOG_INFO("vHost Device '%s' not found", ifname);
> }
> }
>
> static int
> -vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,
> - int enable)
> +vring_state_changed(int vid, uint16_t queue_id, int enable)
> {
> struct netdev_dpdk *dev;
> bool exists = false;
> int qid = queue_id / VIRTIO_QNUM;
> + char ifname[PATH_MAX];
> +
> + rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
>
> if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {
> return 0;
> @@ -2364,7 +2374,7 @@ vring_state_changed(struct virtio_net *virtio_dev,
> uint16_t queue_id,
>
> ovs_mutex_lock(&dpdk_mutex);
> LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> - if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
> + if (strncmp(ifname, dev->vhost_id, PATH_MAX) == 0) {
> ovs_mutex_lock(&dev->mutex);
> if (enable) {
> dev->tx_q[qid].map = qid;
> @@ -2380,25 +2390,17 @@ vring_state_changed(struct virtio_net *virtio_dev,
> uint16_t queue_id,
> ovs_mutex_unlock(&dpdk_mutex);
>
> if (exists) {
> - VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %"
> - PRIu64" changed to \'%s\'", queue_id, qid,
> - virtio_dev->ifname, virtio_dev->device_fh,
> + VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s'"
> + "changed to \'%s\'", queue_id, qid, ifname,
> (enable == 1) ? "enabled" : "disabled");
> } else {
> - VLOG_INFO("vHost Device '%s' %"PRIu64" not found",
> virtio_dev->ifname,
> - virtio_dev->device_fh);
> + VLOG_INFO("vHost Device '%s' not found", ifname);
> return -1;
> }
>
> return 0;
> }
>
> -struct virtio_net *
> -netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
> -{
> - return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);
> -}
> -
> struct ingress_policer *
> netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
> {
> @@ -2848,7 +2850,6 @@ static int
> netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev)
> {
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> - struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> int err = 0;
>
> ovs_mutex_lock(&dpdk_mutex);
> @@ -2874,10 +2875,6 @@ netdev_dpdk_vhost_user_reconfigure(struct netdev
> *netdev)
> }
> }
>
> - if (virtio_dev) {
> - virtio_dev->flags |= VIRTIO_DEV_RUNNING;
> - }
> -
This part of code was moved here from 'new_device()' to postpone using
of this vhost device by PMD threads until first reconfiguration.
(Possible issues: wrong numa node, only 1 queue enabled.)
Maybe we can use another flag or 'vid' itself for the purpose of postponing?
> ovs_mutex_unlock(&dev->mutex);
> ovs_mutex_unlock(&dpdk_mutex);
>
> @@ -3342,7 +3339,7 @@ dpdk_init__(const struct smap *ovs_other_config)
> /* Register CUSE device to handle IOCTLs.
> * Unless otherwise specified, cuse_dev_name is set to vhost-net.
> */
> - err = rte_vhost_driver_register(cuse_dev_name);
> + err = rte_vhost_driver_register(cuse_dev_name, 0);
>
> if (err != 0) {
> VLOG_ERR("CUSE device setup failure.");
>
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev