Re: [ovs-dev] [PATCH RFC v6 2/2] netdev-dpdk: Add vHost User PMD
[snip] > > + > > +static int > > netdev_dpdk_vhost_construct(struct netdev *netdev) > > { > > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > @@ -904,7 +1051,7 @@ netdev_dpdk_vhost_construct(struct netdev > *netdev) > > /* 'name' is appended to 'vhost_sock_dir' and used to create a socket > > in > > * the file system. '/' or '\' would traverse directories, so they're > > not > > * acceptable in 'name'. */ > > -if (strchr(name, '/') || strchr(name, '\\')) { > > +if (strchr(name, '/') || strchr(name, '\\') || strchr(name, ',')) { > > VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. " > > "A valid name must not include '/' or '\\'", > > name); > > @@ -917,18 +1064,26 @@ netdev_dpdk_vhost_construct(struct netdev > *netdev) > > */ > > snprintf(dev->vhost_id, sizeof dev->vhost_id, "%s/%s", > > dpdk_get_vhost_sock_dir(), name); > > +dev->port_id = -1; > > > > -dev->vhost_driver_flags &= ~RTE_VHOST_USER_CLIENT; > > -err = rte_vhost_driver_register(dev->vhost_id, dev- > >vhost_driver_flags); > > -if (err) { > > -VLOG_ERR("vhost-user socket device setup failure for socket %s\n", > > - dev->vhost_id); > > -} else { > > +err = dpdk_attach_vhost_pmd(dev, 0); > > + > > +if (!err) { > > fatal_signal_add_file_to_unlink(dev->vhost_id); > > VLOG_INFO("Socket %s created for vhost-user port %s\n", > >dev->vhost_id, name); > > } > > -err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST); > > +err = netdev_dpdk_init(netdev, dev->port_id, DPDK_DEV_VHOST); > > + > > +if (err) { > > I think, that callbacks are not registered at this point in case of failure. > Anyway, IMHO, 'netdev_dpdk_init' should be responsible for unregistering. Ok > > > +rte_eth_dev_callback_unregister(dev->port_id, > > +RTE_ETH_EVENT_QUEUE_STATE, > > +vring_state_changed_callback, > > NULL); > > +rte_eth_dev_callback_unregister(dev->port_id, > > +RTE_ETH_EVENT_INTR_LSC, > > +link_status_changed_callback, > > NULL); > > +rte_eth_dev_detach(dev->port_id, dev->vhost_id); > > +} > > > > ovs_mutex_unlock(&dpdk_mutex); > > return err; > > @@ -940,7 +1095,7 @@ netdev_dpdk_vhost_client_construct(struct > netdev *netdev) > > int err; > > > > ovs_mutex_lock(&dpdk_mutex); > > -err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST); > > +err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST_CLIENT); > > ovs_mutex_unlock(&dpdk_mutex); > > return err; > > } > > @@ -964,13 +1119,10 @@ netdev_dpdk_construct(struct netdev *netdev) > > } > > > > static void > > -netdev_dpdk_destruct(struct netdev *netdev) > > +dpdk_destruct_helper(struct netdev_dpdk *dev) > > +OVS_REQUIRES(dpdk_mutex) > > +OVS_REQUIRES(dev->mutex) > > { > > -struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > - > > -ovs_mutex_lock(&dpdk_mutex); > > -ovs_mutex_lock(&dev->mutex); > > - > > rte_eth_dev_stop(dev->port_id); > > free(ovsrcu_get_protected(struct ingress_policer *, > >&dev->ingress_policer)); > > @@ -978,61 +1130,59 @@ netdev_dpdk_destruct(struct netdev *netdev) > > rte_free(dev->tx_q); > > ovs_list_remove(&dev->list_node); > > dpdk_mp_put(dev->dpdk_mp); > > - > > -ovs_mutex_unlock(&dev->mutex); > > -ovs_mutex_unlock(&dpdk_mutex); > > } > > > > -/* rte_vhost_driver_unregister() can call back destroy_device(), which will > > - * try to acquire 'dpdk_mutex' and possibly 'dev->mutex'. To avoid a > > - * deadlock, none of the mutexes must be held while calling this function. > */ > > -static int > > -dpdk_vhost_driver_unregister(struct netdev_dpdk *dev OVS_UNUSED, > > - char *vhost_id) > > -OVS_EXCLUDED(dpdk_mutex) > > -OVS_EXCLUDED(dev->mutex) > > +static void > > +netdev_dpdk_destruct(struct netdev *netdev) > > { > > -return rte_vhost_driver_unregister(vhost_id); > > +struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > + > > +ovs_mutex_lock(&dpdk_mutex); > > +ovs_mutex_lock(&dev->mutex); > > + > > +dpdk_destruct_helper(dev); > > + > > +ovs_mutex_unlock(&dev->mutex); > > +ovs_mutex_unlock(&dpdk_mutex); > > } > > > > static void > > netdev_dpdk_vhost_destruct(struct netdev *netdev) > > { > > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > -char *vhost_id; > > > > ovs_mutex_lock(&dpdk_mutex); > > ovs_mutex_lock(&dev->mutex); > > > > -/* Guest becomes an orphan if still attached. */ > > -if (netdev_dpdk_get_vid(dev) >= 0 > > -&& !(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > > +check_link_status(dev); > > +if (dev->link.link_status == ETH_
Re: [ovs-dev] [PATCH RFC 2/2] netdev-dpdk: Add vHost User PMD
> > > I'll post few comments to v4 here. > > > > > static int > > > +dpdk_attach_vhost_pmd(struct netdev_dpdk *dev, int mode) > > > +{ > > > +char *devargs; > > > +int err = 0; > > > +uint8_t port_no = 0; > > > +uint32_t driver_id = -1; > > > + > > > +if (id_pool_alloc_id(dpdk_get_vhost_id_pool(), &driver_id)) { > > > +devargs = xasprintf("net_vhost%u,iface=%s,queues=%i,client=%i", > > > + driver_id, dev->vhost_id, > > > + MIN(OVS_VHOST_MAX_QUEUE_NUM, > > RTE_MAX_QUEUES_PER_PORT), > > > + mode); > > > +err = rte_eth_dev_attach(devargs, &port_no); > > > +if (!err) { > > > +dev->port_id = port_no; > > > +dev->vhost_pmd_id = driver_id; > > > +} else { > > > > id should be freed on error. > > Fixed in v5 > > > > > > +VLOG_ERR("Failed to attach vhost-user device %s to DPDK", > > > + dev->vhost_id); > > > +} > > > +} else { > > > +VLOG_ERR("Unable to create vhost-user device %s - too many > vhost- > > user" > > > + "devices registered with PMD", dev->vhost_id); > > > +err = ENODEV; > > > +} > > > + > > > +return err; > > > +} > > > > -- > > > > > static void > > > netdev_dpdk_vhost_destruct(struct netdev *netdev) > > > { > > > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > > -char *vhost_id; > > > > > > ovs_mutex_lock(&dpdk_mutex); > > > ovs_mutex_lock(&dev->mutex); > > > > > > -/* Guest becomes an orphan if still attached. */ > > > -if (netdev_dpdk_get_vid(dev) >= 0 > > > -&& !(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > > > -VLOG_ERR("Removing port '%s' while vhost device still attached.", > > > - netdev->name); > > > -VLOG_ERR("To restore connectivity after re-adding of port, VM on > > > " > > > - "socket '%s' must be restarted.", dev->vhost_id); > > > > These log messages are useful. I think it's better to keep them somehow. > > Maybe we can check for link status here? > > Sure > > > > > > +if (rte_eth_dev_detach(dev->port_id, dev->vhost_id)) { > > > > 'rte_eth_dev_detach()' will call 'dpdk_vhost_driver_unregister()' and > > this will lead to link status change if vhost still attached. > > And as soon as 'dpdk_mutex' and 'dev->mutex' are taken, there will be > > deadlock > > inside the callback. > > > > See 3f891bbea61d ("netdev-dpdk: Fix deadlock in destroy_device().") for > > details. > > > > The problem here is that we can't call 'rte_eth_dev_detach()' without 'dev- > > >mutex'. > > Ok got it - I'm posting a v5 without this fix. Expect it in the v6. Not sure > how to > approach it just yet. We might be ok here actually. The LSC callback to OVS where we try to acquire the mutex a second time will not occur after rte_eth_dev_detach(). We will fail in DPDK before then: DPDK: rte_eth_vhost.c: static void destroy_device(int vid) { . rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); list = find_internal_resource(ifname); <--- port was removed from list during detach if (list == NULL) { RTE_LOG(ERR, PMD, "Invalid interface name: %s\n", ifname); return; <--- we fail here } . callback_to_ovs_lsc(); <--- we never reach this } Thanks, Ciara > > > > > > +VLOG_ERR("Error removing vhost device %s", dev->vhost_id); > > > +} else { > > > +if (dev->type == DPDK_DEV_VHOST) { > > > > "} else if {" ? > > > > > +fatal_signal_remove_file_to_unlink(dev->vhost_id); > > > +} > > > } > > > +id_pool_free_id(dpdk_get_vhost_id_pool(), dev->vhost_pmd_id); > > > > I guess, that It's better to call 'free()' only if id was allocated. > > I will introduce a check in v5. > > > > > > > > > -free(ovsrcu_get_protected(struct ingress_policer *, > > > - &dev->ingress_policer)); > > > - > > > -rte_free(dev->tx_q); > > > -ovs_list_remove(&dev->list_node); > > > -dpdk_mp_put(dev->dpdk_mp); > > > - > > > -vhost_id = xstrdup(dev->vhost_id); > > > +dpdk_destruct_helper(dev); > > > > > > ovs_mutex_unlock(&dev->mutex); > > > ovs_mutex_unlock(&dpdk_mutex); > > > - > > > -if (dpdk_vhost_driver_unregister(dev, vhost_id)) { > > > -VLOG_ERR("%s: Unable to unregister vhost driver for socket > '%s'.\n", > > > - netdev->name, vhost_id); > > > -} else if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > > > -/* OVS server mode - remove this socket from list for deletion */ > > > -fatal_signal_remove_file_to_unlink(vhost_id); > > > -} > > > -free(vhost_id); > > > } > > > > Best regards, Ilya Maximets. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/d
Re: [ovs-dev] [PATCH] netdev-dpdk: Return rx/tx queue sizes only for ETH devices.
> > 'dev->requested_{rxq,txq}_size' and 'dev->{rxq,txq}_size' are > relevant only for DPDK_DEV_ETH devices and should be skipped > in 'netdev_dpdk_get_config()' for other ports. > > CC: Ciara Loftus > Fixes: b685696b8c81 ("netdev-dpdk: Allow configurable queue sizes for > 'dpdk' ports") Consider this Acked. I implemented the fix already in the vHost PMD RFC: http://openvswitch.org/pipermail/dev/2016-October/080652.html Better I guess to get it in earlier. Thanks, Ciara > > Signed-off-by: Ilya Maximets > --- > lib/netdev-dpdk.c | 17 +++-- > 1 file changed, 11 insertions(+), 6 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 7c1523e..b0beb37 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -1054,13 +1054,18 @@ netdev_dpdk_get_config(const struct netdev > *netdev, struct smap *args) > smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq); > smap_add_format(args, "requested_tx_queues", "%d", dev- > >requested_n_txq); > smap_add_format(args, "configured_tx_queues", "%d", netdev->n_txq); > -smap_add_format(args, "requested_rxq_descriptors", "%d", > -dev->requested_rxq_size); > -smap_add_format(args, "configured_rxq_descriptors", "%d", dev- > >rxq_size); > -smap_add_format(args, "requested_txq_descriptors", "%d", > -dev->requested_txq_size); > -smap_add_format(args, "configured_txq_descriptors", "%d", dev- > >txq_size); > smap_add_format(args, "mtu", "%d", dev->mtu); > + > +if (dev->type == DPDK_DEV_ETH) { > +smap_add_format(args, "requested_rxq_descriptors", "%d", > +dev->requested_rxq_size); > +smap_add_format(args, "configured_rxq_descriptors", "%d", > +dev->rxq_size); > +smap_add_format(args, "requested_txq_descriptors", "%d", > +dev->requested_txq_size); > +smap_add_format(args, "configured_txq_descriptors", "%d", > +dev->txq_size); > +} > ovs_mutex_unlock(&dev->mutex); > > return 0; > -- > 2.7.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC 2/2] netdev-dpdk: Add vHost User PMD
> I'll post few comments to v4 here. > > > static int > > +dpdk_attach_vhost_pmd(struct netdev_dpdk *dev, int mode) > > +{ > > +char *devargs; > > +int err = 0; > > +uint8_t port_no = 0; > > +uint32_t driver_id = -1; > > + > > +if (id_pool_alloc_id(dpdk_get_vhost_id_pool(), &driver_id)) { > > +devargs = xasprintf("net_vhost%u,iface=%s,queues=%i,client=%i", > > + driver_id, dev->vhost_id, > > + MIN(OVS_VHOST_MAX_QUEUE_NUM, > RTE_MAX_QUEUES_PER_PORT), > > + mode); > > +err = rte_eth_dev_attach(devargs, &port_no); > > +if (!err) { > > +dev->port_id = port_no; > > +dev->vhost_pmd_id = driver_id; > > +} else { > > id should be freed on error. Fixed in v5 > > > +VLOG_ERR("Failed to attach vhost-user device %s to DPDK", > > + dev->vhost_id); > > +} > > +} else { > > +VLOG_ERR("Unable to create vhost-user device %s - too many vhost- > user" > > + "devices registered with PMD", dev->vhost_id); > > +err = ENODEV; > > +} > > + > > +return err; > > +} > > -- > > > static void > > netdev_dpdk_vhost_destruct(struct netdev *netdev) > > { > > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > -char *vhost_id; > > > > ovs_mutex_lock(&dpdk_mutex); > > ovs_mutex_lock(&dev->mutex); > > > > -/* Guest becomes an orphan if still attached. */ > > -if (netdev_dpdk_get_vid(dev) >= 0 > > -&& !(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > > -VLOG_ERR("Removing port '%s' while vhost device still attached.", > > - netdev->name); > > -VLOG_ERR("To restore connectivity after re-adding of port, VM on " > > - "socket '%s' must be restarted.", dev->vhost_id); > > These log messages are useful. I think it's better to keep them somehow. > Maybe we can check for link status here? Sure > > > +if (rte_eth_dev_detach(dev->port_id, dev->vhost_id)) { > > 'rte_eth_dev_detach()' will call 'dpdk_vhost_driver_unregister()' and > this will lead to link status change if vhost still attached. > And as soon as 'dpdk_mutex' and 'dev->mutex' are taken, there will be > deadlock > inside the callback. > > See 3f891bbea61d ("netdev-dpdk: Fix deadlock in destroy_device().") for > details. > > The problem here is that we can't call 'rte_eth_dev_detach()' without 'dev- > >mutex'. Ok got it - I'm posting a v5 without this fix. Expect it in the v6. Not sure how to approach it just yet. > > > +VLOG_ERR("Error removing vhost device %s", dev->vhost_id); > > +} else { > > +if (dev->type == DPDK_DEV_VHOST) { > > "} else if {" ? > > > +fatal_signal_remove_file_to_unlink(dev->vhost_id); > > +} > > } > > +id_pool_free_id(dpdk_get_vhost_id_pool(), dev->vhost_pmd_id); > > I guess, that It's better to call 'free()' only if id was allocated. I will introduce a check in v5. > > > > > -free(ovsrcu_get_protected(struct ingress_policer *, > > - &dev->ingress_policer)); > > - > > -rte_free(dev->tx_q); > > -ovs_list_remove(&dev->list_node); > > -dpdk_mp_put(dev->dpdk_mp); > > - > > -vhost_id = xstrdup(dev->vhost_id); > > +dpdk_destruct_helper(dev); > > > > ovs_mutex_unlock(&dev->mutex); > > ovs_mutex_unlock(&dpdk_mutex); > > - > > -if (dpdk_vhost_driver_unregister(dev, vhost_id)) { > > -VLOG_ERR("%s: Unable to unregister vhost driver for socket > > '%s'.\n", > > - netdev->name, vhost_id); > > -} else if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > > -/* OVS server mode - remove this socket from list for deletion */ > > -fatal_signal_remove_file_to_unlink(vhost_id); > > -} > > -free(vhost_id); > > } > > Best regards, Ilya Maximets. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC 2/2] netdev-dpdk: Add vHost User PMD
> > On 20.10.2016 19:33, Loftus, Ciara wrote: > >>>> @@ -894,6 +1006,66 @@ dpdk_dev_parse_name(const char > >> dev_name[], const char prefix[], > >>>> } > >>>> } > >>>> > >>>> +/* When attaching a vhost device to DPDK, a unique name of the > format > >>>> + * 'net_vhostX' is expected, where X is a unique identifier. > >>>> + * get_vhost_drv_id returns a valid X value to provide to DPDK. > >>>> + */ > >>>> +static int > >>>> +get_vhost_drv_id(void) > >>>> +{ > >>>> +int i = 0; > >>>> + > >>>> +for (i = 0; i < RTE_MAX_ETHPORTS; i++) { > >>>> +if (vhost_drv_ids[i] == 0) { > >>>> +return i; > >>>> +} > >>>> +} > >>>> + > >>>> +return -1; > >>>> +} > >>>> + > >>>> +static void > >>>> +set_vhost_drv_id(int id, int val) > >>>> +{ > >>>> +vhost_drv_ids[id] = val; > >>>> +} > >>> > >>> I think we can just increment global atomic counter instead of above > >> solution. > >>> Numbers just need to be uniqe. Driver never use them for anything. > > > > But that way we would be limiting the number of vHost ports to the max > value of the int we use as the counter. > > In that case I recommend to use existing 'id-pool' implementation instead. I wasn't aware of the id-pool implementation. It's ideal for this case. I've added it in the v4. Thanks! > > Best regards, Ilya Maximets. > > P.S.: Could you, please, add me in CC list for the future versions of > this patch set? I just can't find it in patchwork. Apologies - I missed you on the next version. You can find it here: http://openvswitch.org/pipermail/dev/2016-October/080893.html I'll add your CC from now onwards. Not sure why they are not showing up in patchwork. Thanks, Ciara ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC 2/2] netdev-dpdk: Add vHost User PMD
> > Sorry, CC mail-list > > On 20.10.2016 17:20, Ilya Maximets wrote: > > Not a complete review. > > Few comments inline. Thanks for the review. Comments inline. Thanks, Ciara > > > > Best regards, Ilya Maximets. > > > > On 14.10.2016 17:08, Ciara Loftus wrote: > >> The vHost PMD allows vHost User ports to be controlled by the > >> librte_ether API, like physical 'dpdk' ports and IVSHM 'dpdkr' ports. > >> This commit integrates this PMD into OVS and removes direct calls to the > >> librte_vhost DPDK library. > >> > >> This commit requires DPDK v16.11 functionality that isn't available in > >> previous releases, and thus breaks compatibility with such releases. > >> > >> Signed-off-by: Ciara Loftus > >> --- > >> INSTALL.DPDK.md | 10 + > >> NEWS |2 + > >> lib/netdev-dpdk.c | 1101 +- > --- > >> 3 files changed, 447 insertions(+), 666 deletions(-) > >> > >> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > >> index 2f0ae9a..5678e41 100644 > >> --- a/INSTALL.DPDK.md > >> +++ b/INSTALL.DPDK.md > >> @@ -603,6 +603,16 @@ can be found in [Vhost Walkthrough]. > >> > >> http://dpdk.org/doc/guides/rel_notes/release_16.11.html > >> > >> + - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the > context of > >> +DPDK as they are all managed by the rte_ether API. This means that > they > >> +adhere to the DPDK configuration option > CONFIG_RTE_MAX_ETHPORTS which by > >> +default is set to 32. This means by default the combined total number > of > >> +dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is > 32. This > >> +value can be changed if desired by modifying the configuration file in > >> +DPDK, or by overriding the default value on the command line when > building > >> +DPDK. eg. > >> + > >> +`make install CONFIG_RTE_MAX_ETHPORTS=64` > >> > >> Bug Reporting: > >> -- > >> diff --git a/NEWS b/NEWS > >> index ab74fcd..6e47683 100644 > >> --- a/NEWS > >> +++ b/NEWS > >> @@ -135,6 +135,8 @@ v2.6.0 - 27 Sep 2016 > >> * Remove dpdkvhostcuse port type. > >> * OVS client mode for vHost and vHost reconnect (Requires QEMU 2.7) > >> * 'dpdkvhostuserclient' port type. > >> + * vHost PMD integration brings vhost-user ports under control of the > >> + rte_ether DPDK API. > >> - Increase number of registers to 16. > >> - ovs-benchmark: This utility has been removed due to lack of use and > >> bitrot. > >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > >> index 7c1523e..d0b80a7 100644 > >> --- a/lib/netdev-dpdk.c > >> +++ b/lib/netdev-dpdk.c > >> @@ -27,6 +27,7 @@ > >> #include > >> #include > >> #include > >> +#include > >> #include > >> #include > >> #include > >> @@ -122,6 +123,7 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / > ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) > >> #define XSTAT_RX_BROADCAST_PACKETS "rx_broadcast_packets" > >> #define XSTAT_TX_BROADCAST_PACKETS "tx_broadcast_packets" > >> #define XSTAT_RX_UNDERSIZED_ERRORS "rx_undersized_errors" > >> +#define XSTAT_RX_UNDERSIZE_PACKETS "rx_undersize_packets" > >> #define XSTAT_RX_OVERSIZE_ERRORS "rx_oversize_errors" > >> #define XSTAT_RX_FRAGMENTED_ERRORS "rx_fragmented_errors" > >> #define XSTAT_RX_JABBER_ERRORS "rx_jabber_errors" > >> @@ -143,6 +145,9 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / > ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) > >> #define VHOST_ENQ_RETRY_NUM 8 > >> #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) > >> > >> +/* Array that tracks the used & unused vHost user driver IDs */ > >> +static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS]; > >> + > >> static const struct rte_eth_conf port_conf = { > >> .rxmode = { > >> .mq_mode = ETH_MQ_RX_RSS, > >> @@ -343,15 +348,12 @@ struct netdev_dpdk { > >> struct rte_eth_link link; > >> int link_reset_cnt; > >> > >> -/* virtio identifier for vhost devices */ > >> -ovsrcu_index vid; > >> - > >> -/* True if vHost device is 'up' and has been reconfigured at least > >> once > */ > >> -bool vhost_reconfigured; > >> - > >> /* Identifier used to distinguish vhost devices from each other. */ > >> char vhost_id[PATH_MAX]; > >> > >> +/* ID of vhost user port given to the PMD driver */ > >> +unsigned int vhost_pmd_id; > >> + > >> /* In dpdk_list. */ > >> struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); > >> > >> @@ -392,16 +394,25 @@ struct netdev_rxq_dpdk { > >> }; > >> > >> static int netdev_dpdk_construct(struct netdev *); > >> - > >> -int netdev_dpdk_get_vid(const struct netdev_dpdk *dev); > >> +static int netdev_dpdk_vhost_construct(struct netdev *); > >> +static int netdev_dpdk_vhost_client_construct(struct netdev *); > >> > >> struct ingress_policer * > >> netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev); > >> > >> +static void link_status_changed_callback(ui
Re: [ovs-dev] OVS DPDK build errors with commit 01961bb
> > Hi > I found that OVS DPDK fails to build with DPDK 16.07 after commit > 01961bb. I am not sure if this is because of some expectations about > DPDK configuration or problems in the code. I could fix this fairly > easily with 4 additions to lib/dpdk.c (patch below), but am unsure > about whether this is what was intended. Hi, Apologies, I just submitted a very similar patch. I hadn't seen yours! I think the pdump include should be conditional, depending on whether DPDK_PDUMP is detected. It can also be taken out of netdev-dpdk.c as it's no longer required there. For my build I didn't require adding the ring and mempool includes to this file. Do you have additional DPDK config options set that require these headers to be included here? Thanks, Ciara > > Panda > > > > diff --git a/lib/dpdk.c b/lib/dpdk.c > index caea0f4..e998b65 100644 > --- a/lib/dpdk.c > +++ b/lib/dpdk.c > @@ -22,12 +22,16 @@ > #include > > #include > +#include > +#include > +#include > > #include "dirs.h" > #include "netdev-dpdk.h" > #include "openvswitch/dynamic-string.h" > #include "openvswitch/vlog.h" > #include "smap.h" > +#include "fatal-signal.h" > > VLOG_DEFINE_THIS_MODULE(dpdk); > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v4] netdev-dpdk: Allow configurable queue sizes for 'dpdk' ports
> > 2016-09-29 3:28 GMT-07:00 Loftus, Ciara : > > > > Hi Ciara, > > thanks for the patch, it looks good to me. > > I only have a minor comment: > > I'd like the requested values to depend only on the current database > > state. With the current patch when a value is invalid (not pow2 or bigger > > than 4096) we keep the previous one. > > Could you change dpdk_process_queue_size() to return a default value > > (which can be passed as an argument) when the value from the database is > > absent or not valid? > > I guess dpdk_process_queue_size() could return this value directly, instead > > of returning it through a pointer. > > Hi Daniele, > > Thanks for the review. Can you please clarify your request. > What do you suggest we assign the return value of process_queues() to? If > requested_size is to reflect the DB value then I assume not that. > The validity checks seem pointless in process_queue_size if we are doing to > set the requested value regardless. If requested_size reflects the DB value I > see two options: > 1. Do the pow2 and size checks in reconfigure, before assigning dev- > >xq_sizes, and only assign if valid. > 2. Similar to n_rxq, try set up the queue and if it fails, fall back on a > known > good (previous) value. This removes the pow2 etc checks. > > Let me know your opinion, or another option if you have it. > > Sorry, I wasn't clear enough. > The checks look good to me, I was thinking about what to do if the checks > fail. > > Considering the following scenario: > ovs-vsctl set int dpdk0 options:n_rxq_desc=1024 > ovs-vsctl set int dpdk0 options:n_rxq_desc=3000 #Invalid value. > With your patch, after the second ovs-vsctl, the effective rxq_desc will be > 1024 (the previous value). In my opinion it should be > NIC_PORT_DEFAULT_RXQ_SIZE, like it's specified in the documentation: > > "If not specified or an incorrect value is specified, 2048 rx descriptors > will be > used by default." > How about something like this? (I realized that it's probably easier to use > the > pointer and return void, please disregard my previous suggestion) Thanks for the clarification, makes sense. I've sent a v5 - let me know if I misinterpreted your request. Thanks, Ciara > > static void > dpdk_process_queue_size(struct netdev *netdev, const struct smap *args, > const char *flag, int default, > int *new_size) > { > int queue_size = smap_get_int(args, flag, default); > if (/* queue size is invalid */) { > queue_size = default; > } > if (queue_size != *new_size) { > reconfigure(); > } > } > > netdev_dpdk_set_config() > { > /*... */ > dpdk_process_queue_size(/* */, NIC_PORT_DEFAULT_RXQ_SIZE, &dev- > >requested_rxq_size); > dpdk_process_queue_size(/* */, NIC_PORT_DEFAULT_TXQ_SIZE, &dev- > >requested_txq_size); > /*... */ > } > Thanks, > Daniele ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v4] netdev-dpdk: Allow configurable queue sizes for 'dpdk' ports
> > Hi Ciara, > thanks for the patch, it looks good to me. > I only have a minor comment: > I'd like the requested values to depend only on the current database > state. With the current patch when a value is invalid (not pow2 or bigger > than 4096) we keep the previous one. > Could you change dpdk_process_queue_size() to return a default value > (which can be passed as an argument) when the value from the database is > absent or not valid? > I guess dpdk_process_queue_size() could return this value directly, instead > of returning it through a pointer. Hi Daniele, Thanks for the review. Can you please clarify your request. What do you suggest we assign the return value of process_queues() to? If requested_size is to reflect the DB value then I assume not that. The validity checks seem pointless in process_queue_size if we are doing to set the requested value regardless. If requested_size reflects the DB value I see two options: 1. Do the pow2 and size checks in reconfigure, before assigning dev->xq_sizes, and only assign if valid. 2. Similar to n_rxq, try set up the queue and if it fails, fall back on a known good (previous) value. This removes the pow2 etc checks. Let me know your opinion, or another option if you have it. Thanks, Ciara > > > 2016-09-22 7:25 GMT-07:00 Ilya Maximets : > I didn't test that, but it looks good to me. > > Acked-by: Ilya Maximets > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v3] netdev-dpdk: Allow configurable queue sizes for 'dpdk' ports
> > Few comments inline. Thanks for the feedback Ilya. > > > The 'options:n_rxq_desc' and 'n_txq_desc' fields allow the number of rx > > and tx descriptors for dpdk ports to be modified. By default the values > > are set to 2048, but can be modified to an integer between 1 and 4096 > > that is a power of two. The values can be modified at runtime, however > > require the NIC to restart when changed. > > > > Signed-off-by: Ciara Loftus > > > > --- > > v3: > > * Make queue sizes per-port rather than global > > * Check if queue size is power of 2 - fail if so. > > > > v2: > > * Rebase > > > > INSTALL.DPDK-ADVANCED.md | 16 ++-- > > NEWS | 2 ++ > > lib/netdev-dpdk.c| 48 > +++- > > vswitchd/vswitch.xml | 22 ++ > > 4 files changed, 81 insertions(+), 7 deletions(-) > > > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > > index d7b9873..488e84f 100644 > > --- a/INSTALL.DPDK-ADVANCED.md > > +++ b/INSTALL.DPDK-ADVANCED.md > > @@ -257,7 +257,19 @@ needs to be affinitized accordingly. > >The rx queues are assigned to pmd threads on the same NUMA node in a > >round-robin fashion. > > > > -### 4.4 Exact Match Cache > > +### 4.4 DPDK Physical Port Queue Sizes > > + `ovs-vsctl set Interface dpdk0 options:n_rxq_desc=` > > + `ovs-vsctl set Interface dpdk0 options:n_txq_desc=` > > + > > + The command above sets the number of rx/tx descriptors that the NIC > > + associated with dpdk0 will be initialised with. > > + > > + Different 'n_rxq_desc' and 'n_txq_desc' configurations yield different > > + benefits in terms of throughput and latency for different scenarios. > > + Generally, smaller queue sizes can have a positive impact for latency at > the > > + expense of throughput. The opposite is often true for larger queue sizes. > > Here we can mention that increasing the number of rx descriptors may lead > to performance degradation because of using non-vectorized rx functions. > At least this is true for i40e and maybe ixgbe dpdk drivers. Setting > 'n_rxq_desc=4096' for them will lead to disabling of vectorized rx. It seems the same applies for ixgbe (IXGBE_MAX_RING_DESC=4096) http://dpdk.org/doc/guides/nics/ixgbe.html#rx-constraints I will include this info in the next version. > > > + > > +### 4.5 Exact Match Cache > > > >Each pmd thread contains one EMC. After initial flow setup in the > >datapath, the EMC contains a single table and provides the lowest level > > @@ -274,7 +286,7 @@ needs to be affinitized accordingly. > >avoiding datapath classifier lookups is to have multiple pmd threads > >running. This can be done as described in section 4.2. > > > > -### 4.5 Rx Mergeable buffers > > +### 4.6 Rx Mergeable buffers > > > >Rx Mergeable buffers is a virtio feature that allows chaining of multiple > >virtio descriptors to handle large packet sizes. As such, large packets > > diff --git a/NEWS b/NEWS > > index 21ab538..901886d 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -125,6 +125,8 @@ v2.6.0 - xx xxx > > * Remove dpdkvhostcuse port type. > > * OVS client mode for vHost and vHost reconnect (Requires QEMU 2.7) > > * 'dpdkvhostuserclient' port type. > > + * New option 'n_rxq_desc' and 'n_txq_desc' fields for DPDK interfaces > > + which set the number of rx and tx descriptors to use for the given > port. > > - Increase number of registers to 16. > > - ovs-benchmark: This utility has been removed due to lack of use and > > bitrot. > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 89bdc4d..228993f 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -132,8 +132,9 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / > ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) > > > > #define SOCKET0 0 > > > > -#define NIC_PORT_RX_Q_SIZE 2048 /* Size of Physical NIC RX Queue, > Max (n+32<=4096)*/ > > -#define NIC_PORT_TX_Q_SIZE 2048 /* Size of Physical NIC TX Queue, > Max (n+32<=4096)*/ > > +#define NIC_PORT_DEFAULT_RXQ_SIZE 2048 /* Default size of Physical > NIC RXQ */ > > +#define NIC_PORT_DEFAULT_TXQ_SIZE 2048 /* Default size of Physical > NIC TXQ */ > > +#define NIC_PORT_MAX_Q_SIZE 4096 /* Maximum size of Physical > NIC Queue */ > > > > #define OVS_VHOST_MAX_QUEUE_NUM 1024 /* Maximum number of > vHost TX queues. */ > > #define OVS_VHOST_QUEUE_MAP_UNKNOWN (-1) /* Mapping not > initialized. */ > > @@ -372,6 +373,12 @@ struct netdev_dpdk { > > int requested_mtu; > > int requested_n_txq; > > int requested_n_rxq; > > +int requested_rxq_size; > > +int requested_txq_size; > > + > > +/* Number of rx/tx descriptors for physical devices */ > > +int rxq_size; > > +int txq_size; > > > > /* Socket ID detected when vHost device is brought up */ > > int requested_socket_id; > > @@ -646,7 +653,7 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk > *dev,
Re: [ovs-dev] [PATCH v2] netdev-dpdk: Allow configurable queue sizes for 'dpdk' ports
> > Hi, Ciara. > I also wanted to do something like this. But, IMHO, this configuration > should be per port because different HW behaves differently and has > different capabilities. > > What do you think? > > For the implementation: I think, we should check that value is a power of 2 > because its required at least by some dpdk drivers (i40e for example). Thanks for the feedback Ilya. These are good suggestions. I will rework the patch to include these changes and resubmit soon. Thanks, Ciara > > Best regards, Ilya Maximets. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [ovs-dev,v4,3/5] netdev-dpdk: Add vHost User PMD
> > > I've added vHost maintainers to CC-list to hear their opinion about > > new API to get number of queues from the vHost PMD. > > Maybe we can expose 'rte_vhost_get_queue_num()' somehow or make > > 'dev_info->nb_rx_queues' usable? > > > I appreciate great investigation. > So far, I am not sure what is good way to get the value , but I agree it's > nice to > have such functionality. > Currently vhost library has such a function. > So, one of possible solution may prepare a function to convert portid to vid, > then use vid to call vhost library function directly. Hi, I submitted a patch that returns the vid for a given port_id as suggested above: http://dpdk.org/ml/archives/dev/2016-September/046631.html Any feedback would be much appreciated. Would hope to get this into DPDK 16.11 such that we can integrate the vHost PMD into OVS when it supports 16.11. Thanks, Ciara > Thanks, > Tetsuya > > NACK for now. > > > > Best regards, Ilya Maximets. > > > > On 29.07.2016 16:24, Ciara Loftus wrote: > > > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports > > > to be controlled by the librte_ether API, like physical 'dpdk' ports and > > > IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and > > > removes direct calls to the librte_vhost DPDK library. > > > > > > This commit removes extended statistics support for vHost User ports > > > until such a time that this becomes available in the vHost PMD in a > > > DPDK release supported by OVS. > > > > > > Signed-off-by: Ciara Loftus > > > --- > > > INSTALL.DPDK.md | 10 + > > > NEWS | 2 + > > > lib/netdev-dpdk.c | 857 ++-- > -- > > > 3 files changed, 300 insertions(+), 569 deletions(-) > > > > > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > > > index 7609aa7..4feb7be 100644 > > > --- a/INSTALL.DPDK.md > > > +++ b/INSTALL.DPDK.md > > > @@ -604,6 +604,16 @@ can be found in [Vhost Walkthrough]. > > > > > > http://dpdk.org/doc/guides/rel_notes/release_16_04.html > > > > > > + - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the > context of > > > + DPDK as they are all managed by the rte_ether API. This means that > they > > > + adhere to the DPDK configuration option > CONFIG_RTE_MAX_ETHPORTS which by > > > + default is set to 32. This means by default the combined total number > of > > > + dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is > 32. This > > > + value can be changed if desired by modifying the configuration file > > > in > > > + DPDK, or by overriding the default value on the command line when > building > > > + DPDK. eg. > > > + > > > + `make install CONFIG_RTE_MAX_ETHPORTS=64` > > > > > > Bug Reporting: > > > -- > > > diff --git a/NEWS b/NEWS > > > index dc3dedb..6510dde 100644 > > > --- a/NEWS > > > +++ b/NEWS > > > @@ -64,6 +64,8 @@ Post-v2.5.0 > > > * Basic connection tracking for the userspace datapath (no ALG, > > > fragmentation or NAT support yet) > > > * Remove dpdkvhostcuse port type. > > > + * vHost PMD integration brings vhost-user ports under control of the > > > + rte_ether DPDK API. > > > - Increase number of registers to 16. > > > - ovs-benchmark: This utility has been removed due to lack of use and > > > bitrot. > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > > index d6959fe..d6ceeec 100644 > > > --- a/lib/netdev-dpdk.c > > > +++ b/lib/netdev-dpdk.c > > > @@ -30,7 +30,6 @@ > > > #include > > > #include > > > #include > > > -#include > > > > > > #include "dirs.h" > > > #include "dp-packet.h" > > > @@ -56,9 +55,9 @@ > > > #include "unixctl.h" > > > > > > #include "rte_config.h" > > > +#include "rte_eth_vhost.h" > > > #include "rte_mbuf.h" > > > #include "rte_meter.h" > > > -#include "rte_virtio_net.h" > > > > > > VLOG_DEFINE_THIS_MODULE(dpdk); > > > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > > > @@ -141,6 +140,9 @@ static char *vhost_sock_dir = NULL; /* Location > of vhost-user sockets */ > > > > > > #define VHOST_ENQ_RETRY_NUM 8 > > > > > > +/* Array that tracks the used & unused vHost user driver IDs */ > > > +static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS]; > > > + > > > static const struct rte_eth_conf port_conf = { > > > .rxmode = { > > > .mq_mode = ETH_MQ_RX_RSS, > > > @@ -346,12 +348,15 @@ struct netdev_dpdk { > > > struct rte_eth_link link; > > > int link_reset_cnt; > > > > > > - /* virtio-net structure for vhost device */ > > > - OVSRCU_TYPE(struct virtio_net *) virtio_dev; > > > + /* Number of virtqueue pairs reported by the guest */ > > > + uint32_t vhost_qp_nb; > > > > > > /* Identifier used to distinguish vhost devices from each other */ > > > char vhost_id[PATH_MAX]; > > > > > > + /* ID of vhost user port given to the PMD driver */ > > > + unsigned int vhost_pmd_id; > > > + > > > /
Re: [ovs-dev] [PATCH] netdev-dpdk: Provide explicit flag to rte_vhost_driver_register
> > ~RTE_VHOST_USER_CLIENT means that every other bit is set to one, which > affects other flags. > I think the current code is ok: it's not wrokg to assume that the newly > initialized structure has every member set to 0. > If you feel strongly about it, I think it would be better to set the single > bit in > vhost_driver_flags. Ok - at the moment in DPDK if the last bit in the flags is zero the others aren't checked, but it is probably better to just toggle the last bit as you suggested. Anyway, if we can rely on the zero initialisation like you said, this patch isn't needed. Thanks, Ciara > Thanks, > Daniele > > 2016-08-18 5:29 GMT-07:00 Loftus, Ciara : > > > > rte_vhost_driver_register accepts a 'mode' flag which sets server(0) or > > client(1) mode. vHost devices are registered in 'server' mode (0) when > > initially created. Before this patch the flags provided to this > > function were the device's 'vhost_driver_flags' which hadn't been set > > yet when the register function is called but happen to be zero. Provide > > an explicit flag instead for better practice. > > > > Fixes: ("c1ff66ac80b5: netdev-dpdk: vHost client mode and reconnect") > > Apologies for mis-format. Should read: > Fixes: c1ff66ac80b5 ("netdev-dpdk: vHost client mode and reconnect") > > > Signed-off-by: Ciara Loftus > > --- > > lib/netdev-dpdk.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index e5f2cdd..32853ce 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -918,7 +918,7 @@ netdev_dpdk_vhost_construct(struct netdev > > *netdev) > > vhost_sock_dir, name); > > > > err = rte_vhost_driver_register(dev->vhost_server_id, > > - dev->vhost_driver_flags); > > + ~RTE_VHOST_USER_CLIENT); > > if (err) { > > VLOG_ERR("vhost-user socket device setup failure for socket %s\n", > > dev->vhost_server_id); > > -- > > 2.4.3 > > > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v4 3/3] netdev-dpdk: vHost client mode and reconnect
> Hi Sean, > > 2016-08-15 9:04 GMT-07:00 Mooney, Sean K : > Hi Daniele > Sorry to top post but I have just read back over the last > couple of revisions of this patch. > > No problem, thanks for stepping in. As I said many times during the review > of this series I'm not sure what the best interface would be, and I really > appreciate any feedback on this. > > I noticed that you requested that the vhost-driver-mode flag be removed > From the Open_vSwitch table. The vhost-driver-mode flag was included in > The original patch for two reasons 1 to configure the global driver mode > > In my opinion Open vSwitch is not the right place to store this global > configuration parameter. I think we should put it as high in the stack as > possible. Isn't there another place to store it in OpenStack? > [Mooney, Sean K] OpenStack make the choice instead of requiring ovs to > store this information as a global parameter as long as we can detect if the > feature is available. > > And 2 to provide a way to detect if reconnect/qemu server mode was > available. > > I see your point, we need feature detection for this. > Perhaps we can use another type for client ports, like > "dpdkvhostuserclient". I think it make senses to have a separate class, since > the interface is different anyway. It would be easy then to detect the > feature based on the available iface_types. > [Mooney, Sean K] Yes if we keep the current dpdkvhostuser port type for > qemu:clinet dpdk:server mode > and introduced a "dpdkvhostuserclient" for qemu the new qemu:server > dpdk:clinet mode of vhost-user that would work perfectly for my usecase. I submitted a patch that implements the suggested 'dpdkvhostuserclient' port type. http://openvswitch.org/pipermail/dev/2016-August/078199.html I think it's a good idea and a cleaner approach. Please add your 'Suggested-by' Daniele if you decide to apply the patch, I forgot it in the v1 - apologies. Will include it next time if there is another revision. Thanks, Ciara > it would be a trivial change in openstack and this should work equally well in > odl and ovn. > Could this be included in the 2.6 release? > What do you think? > Thanks, > Daniele > > > Without the global flag or a similar mechanism to expose the capability via > The ovsdb I cannot complete the OpenStack integrations. > https://review.openstack.org/#/c/344997/ > > I have one proposal which is to store the feature list currently in the faq > https://github.com/openvswitch/ovs/blob/master/FAQ.md#q-are-all- > features-available-with-all-datapaths > in the ovsdb. This can be retrieve remotely via the ovs db by openstack or > any other orchestrator to > make dession based on the feature detected. > > If you have another suggestion I would be glad to adapt my OpenStack > change > To use another mechanism to detect the support for reconnect but without > the > vhost-driver-mode flag I am currently blocked. > > I will make as a separate thread not to discuss feature discovery in general > Not to distract from this review > Regards > Sean. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] netdev-dpdk: Provide explicit flag to rte_vhost_driver_register
> > rte_vhost_driver_register accepts a 'mode' flag which sets server(0) or > client(1) mode. vHost devices are registered in 'server' mode (0) when > initially created. Before this patch the flags provided to this > function were the device's 'vhost_driver_flags' which hadn't been set > yet when the register function is called but happen to be zero. Provide > an explicit flag instead for better practice. > > Fixes: ("c1ff66ac80b5: netdev-dpdk: vHost client mode and reconnect") Apologies for mis-format. Should read: Fixes: c1ff66ac80b5 ("netdev-dpdk: vHost client mode and reconnect") > Signed-off-by: Ciara Loftus > --- > lib/netdev-dpdk.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index e5f2cdd..32853ce 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -918,7 +918,7 @@ netdev_dpdk_vhost_construct(struct netdev > *netdev) > vhost_sock_dir, name); > > err = rte_vhost_driver_register(dev->vhost_server_id, > -dev->vhost_driver_flags); > +~RTE_VHOST_USER_CLIENT); > if (err) { > VLOG_ERR("vhost-user socket device setup failure for socket %s\n", > dev->vhost_server_id); > -- > 2.4.3 > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC v3 1/1] netdev-dpdk: Arbitrary 'dpdk' port naming
> > 2016-07-19 2:53 GMT-07:00 Loftus, Ciara : > > > > > > The idea looks very good to me, thanks for working on it. > > > Very high level comments: > > Hi Daniele thanks for looking at this. > > > > > > > > Do we need to be limited to pci devices? Perhaps we can accept the > same > > > string as rte_eth_dev_attach(). > > Can you elaborate? For physical devs the string is always the PCI address. > Do > > you mean to include virtual devices as well? This could be an option once > we > > can use the ethdev API with vHost ports if the PMD gets merged. > > > > I agree with you that for vhost devices we can wait for vHost PMD. I was > > thinking more about devices like DPDK "af_packet" and "pcap". Can we > use > > this interface to create those as well? > > Understood. It’s possible. If the string provided isn't PCI format we can > assume it's a vdev and provide the args to attach() without searching through > the PCI devices and trying to find a match first. > I can include this in the v4. However I won't be able to thoroughly test all > 20+ > DPDK PMDs and ensure the attach() works for them all. I tested a few - some > worked out of the box eg. eth_null, some didn’t eg af_packet. > I imagine that the netdev_class dpdk_class functions only happen to be > compatible with some PMDs straight away. Those that aren't compatible will > require new port types (and modifications to existing / new netdev > functions) which I think is beyond the scope of this patch. Hi Daniele, I plan to submit a new version of this soon. Would like your opinion if possible on the way to support for other DPDK devices as you suggested previously. I think we should keep 'dpdk' ports limited to physical devices with associated PCI addresses. We could create a new port type ('dpdkvdev' maybe?) for the other devices like af_packet and eth_null. I would consider this port type as more 'experimental' (for a few reasons, mainly limited testing as mentioned above) and thus better to be kept separate. These ports could be configured like so: ovs-vsctl set Interface vdevX options:dpdk-devargs=eth_null0 'dpdk-devargs' would be supplied to rte_eth_dev_attach(). Thanks, Ciara > > Thanks, > Ciara > > > Thanks, > > Daniele > > > > > > > Would it be possible to integrate this more with the hotplug patch? It > > would > > > be nice to avoid introducing extra appctl commands and call > > > rte_eth_dev_attach() if needed in netdev_dpdk_construct(). > > Good idea. I'll look at this for the v4. > > > > Thanks, > > Ciara > > > > > Thoughts? > > > Thanks, > > > Daniele > > > > > > 2016-07-15 9:34 GMT-07:00 Ciara Loftus : > > > 'dpdk' ports no longer have naming restrictions. Now, instead > > > of specifying the dpdk port ID as part of the name, the PCI > > > address of the device must be specified via the 'dpdk-pci' > > > option. eg. > > > > > > ovs-vsctl add-port br0 my-port > > > ovs-vsctl set Interface my-port type=dpdk > > > ovs-vsctl set Interface my-port options:dpdk-pci=:06:00.3 > > > > > > Signed-off-by: Ciara Loftus > > > > > > v2: > > > - remove global pci list > > > - remove unnecessary parenthesis > > > - remove return from void fn > > > - print pci like dpdk > > > - fix port ranges > > > --- > > > INSTALL.DPDK-ADVANCED.md | 2 +- > > > INSTALL.DPDK.md | 10 ++-- > > > NEWS | 2 + > > > lib/netdev-dpdk.c | 132 > > > ++- > > > 4 files changed, 116 insertions(+), 30 deletions(-) > > > > > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK- > ADVANCED.md > > > index 61b4e82..7370d03 100644 > > > --- a/INSTALL.DPDK-ADVANCED.md > > > +++ b/INSTALL.DPDK-ADVANCED.md > > > @@ -854,7 +854,7 @@ At this point, the user can create a ovs port using > > the > > > add-port command. > > > It is also possible to detach a port from ovs, the user has to remove the > > > port using the del-port command, then it can be detached using: > > > > > > -`ovs-appctl netdev-dpdk/port-detach dpdk0` > > > +`ovs-appctl netdev-dpdk/port-detach :01:00.0` > > > > > > This feature is not supported with VFIO and could not work with some > > NICs, > > > please refer to the [DPDK Port Hotplug Frame
Re: [ovs-dev] [PATCH v4 3/3] netdev-dpdk: vHost client mode and reconnect
> > Thanks for the patch, I tried it and it makes possible to restart vswitchd and > qemu. > I believe that now vhost_server_id and vhost_client_id are not constant for > the lifetime of the struct and must be protected with dev->mutex. > The following incremental on top of your patch does that and remove extra > parentheses from sizeof operator: > > - /* Identifiers used to distinguish vhost devices from each other. They do > - * not change during the lifetime of a struct netdev_dpdk. They can be > read > - * without holding any mutex. */ > - const char vhost_server_id[PATH_MAX]; > - const char vhost_client_id[PATH_MAX]; > + /* Identifiers used to distinguish vhost devices from each other. */ > + char vhost_server_id[PATH_MAX]; > + char vhost_client_id[PATH_MAX]; > > /* In dpdk_list. */ > struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); > @@ -837,6 +835,7 @@ dpdk_dev_parse_name(const char dev_name[], > const char prefix[], > * use */ > static const char * > get_vhost_id(struct netdev_dpdk *dev) > + OVS_REQUIRES(dev->mutex) > { > return dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT ? > dev->vhost_client_id : dev->vhost_server_id; > @@ -867,20 +866,20 @@ netdev_dpdk_vhost_construct(struct netdev > *netdev) > /* Take the name of the vhost-user port and append it to the location > where > * the socket is to be created, then register the socket. > */ > - snprintf(CONST_CAST(char *, dev->vhost_server_id), > - sizeof(dev->vhost_server_id), "%s/%s", vhost_sock_dir, name); > + snprintf(dev->vhost_server_id, sizeof dev->vhost_server_id, "%s/%s", > + vhost_sock_dir, name); > > - err = rte_vhost_driver_register(get_vhost_id(dev), > + err = rte_vhost_driver_register(dev->vhost_server_id, > dev->vhost_driver_flags); > if (err) { > VLOG_ERR("vhost-user socket device setup failure for socket %s\n", > - get_vhost_id(dev)); > + dev->vhost_server_id); > } else { > if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > /* OVS server mode - add this socket to list for deletion */ > - fatal_signal_add_file_to_unlink(get_vhost_id(dev)); > + fatal_signal_add_file_to_unlink(dev->vhost_server_id); > VLOG_INFO("Socket %s created for vhost-user port %s\n", > - get_vhost_id(dev), name); > + dev->vhost_server_id, name); > } > err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST); > } > @@ -935,17 +934,19 @@ netdev_dpdk_destruct(struct netdev *netdev) > * try to acquire 'dpdk_mutex' and possibly 'dev->mutex'. To avoid a > * deadlock, none of the mutexes must be held while calling this function. */ > static int > -dpdk_vhost_driver_unregister(struct netdev_dpdk *dev) > +dpdk_vhost_driver_unregister(struct netdev_dpdk *dev OVS_UNUSED, > + const char *vhost_id) > OVS_EXCLUDED(dpdk_mutex) > OVS_EXCLUDED(dev->mutex) > { > - return rte_vhost_driver_unregister(get_vhost_id(dev)); > + return rte_vhost_driver_unregister(vhost_id); > } > > static void > netdev_dpdk_vhost_destruct(struct netdev *netdev) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > + char *vhost_id; > > ovs_mutex_lock(&dpdk_mutex); > ovs_mutex_lock(&dev->mutex); > @@ -967,15 +968,18 @@ netdev_dpdk_vhost_destruct(struct netdev > *netdev) > ovs_list_remove(&dev->list_node); > dpdk_mp_put(dev->dpdk_mp); > > + vhost_id = xstrdup(get_vhost_id(dev)); > + > ovs_mutex_unlock(&dev->mutex); > ovs_mutex_unlock(&dpdk_mutex); > > - if (dpdk_vhost_driver_unregister(dev)) { > - VLOG_ERR("Unable to remove vhost-user socket %s", > get_vhost_id(dev)); > + if (dpdk_vhost_driver_unregister(dev, vhost_id)) { > + VLOG_ERR("Unable to remove vhost-user socket %s", vhost_id); > } else if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > /* OVS server mode - remove this socket from list for deletion */ > - fatal_signal_remove_file_to_unlink(get_vhost_id(dev)); > + fatal_signal_remove_file_to_unlink(vhost_id); > } > + free(vhost_id); > } > > static void > @@ -2297,10 +2301,10 @@ new_device(int vid) > ovs_mutex_lock(&dpdk_mutex); > /* Add device to the vhost port with the same name as that passed down. > */ > LIST_FOR_EACH(dev, list_node, &dpdk_list) { > + ovs_mutex_lock(&dev->mutex); > if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) { > uint32_t qp_num = rte_vhost_get_queue_num(vid); > > - ovs_mutex_lock(&dev->mutex); > /* Get NUMA information */ > newnode = rte_vhost_get_numa_node(vid); > if (newnode == -1) { > @@ -2330,6 +2334,7 @@ new_device(int vid) > ovs_mutex_unlock(&dev->mutex)
Re: [ovs-dev] [PATCH] netdev-dpdk: Remove unnecessary 'if' statement
> > Hi Ciara, > Please find my comments below, > > > Regards > _Sugesh > > > > -Original Message- > > From: Loftus, Ciara > > Sent: Monday, August 15, 2016 10:01 AM > > To: Chandran, Sugesh ; > dev@openvswitch.org > > Subject: RE: [ovs-dev] [PATCH] netdev-dpdk: Remove unnecessary 'if' > > statement > > > > > > > > Hi Ciara, > > > Thank you for fixing this. > > > Changes are looks fine for me. > > > A minor comment as below. > > > Acked! > > > > > > > > > Regards > > > _Sugesh > > > > > > > > > > -Original Message- > > > > From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ciara > > > Loftus > > > > Sent: Friday, August 12, 2016 5:17 PM > > > > To: dev@openvswitch.org > > > > Subject: [ovs-dev] [PATCH] netdev-dpdk: Remove unnecessary 'if' > > > > statement > > > > > > > > Only devices of type "DPDK_DEV_ETH" use the > netdev_dpdk_set_config > > > > function, so no need to check for the device type within the function. > > > > > > > > Fixes: 9fd39370c12c ("netdev-dpdk: Add Flow Control support.") > > > > Signed-off-by: Ciara Loftus > > > > --- > > > > lib/netdev-dpdk.c | 27 +-- > > > > 1 file changed, 13 insertions(+), 14 deletions(-) > > > > > > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index > > > > 9a1f7cd..f772998 > > > > 100644 > > > > --- a/lib/netdev-dpdk.c > > > > +++ b/lib/netdev-dpdk.c > > > > @@ -1024,20 +1024,19 @@ netdev_dpdk_set_config(struct netdev > > > *netdev, > > > > const struct smap *args) > > > > } > > > > > > > > /* Flow control configuration for DPDK Ethernet ports. */ > > > > -if (dev->type == DPDK_DEV_ETH) { > > > > -bool rx_fc_en = false; > > > > -bool tx_fc_en = false; > > > > -enum rte_eth_fc_mode fc_mode_set[2][2] = > > > > - {{RTE_FC_NONE, > > > > RTE_FC_TX_PAUSE}, > > > > -{RTE_FC_RX_PAUSE, > > > > RTE_FC_FULL} > > > > - }; > > > > -rx_fc_en = smap_get_bool(args, "rx-flow-ctrl", false); > > > > -tx_fc_en = smap_get_bool(args, "tx-flow-ctrl", false); > > > > -dev->fc_conf.autoneg = smap_get_bool(args, "flow-ctrl-autoneg", > > > > false); > > > > -dev->fc_conf.mode = fc_mode_set[tx_fc_en][rx_fc_en]; > > > > - > > > > -dpdk_eth_flow_ctrl_setup(dev); > > > > -} > > > [Sugesh] I would add a comment to say that the flow control is > > > supported only on Eth/physical ports for better readability. > > > > Thanks for the review Sugesh, I'll improve the comment for the v2. > > Out of interest, do 'dpdkr' ivshm ports support flow ctrl? At the moment > > (with and without this patch) we attempt to initialise flow ctrl for these > ports. > > If that's not intended behaviour I'll fix that in v2 as well. > [Sugesh] Good catch, the flow control can be enabled only on physical NIC > ports. > Ivshm ports doesn’t have flow control support. Can you please add that > check as well,? Thanks for clarifying. I've added this change in the v2. I didn't add your Ack as the patch has changed quite a bit since, but will add again if you're happy with the new patch. Thanks, Ciara > > Thanks, > Sugesh > > > > > Thanks, > > Ciara > > > > > > +bool rx_fc_en = false; > > > > +bool tx_fc_en = false; > > > > +enum rte_eth_fc_mode fc_mode_set[2][2] = > > > > + {{RTE_FC_NONE, RTE_FC_TX_PAUSE}, > > > > +{RTE_FC_RX_PAUSE, RTE_FC_FULL} > > > > + }; > > > > +rx_fc_en = smap_get_bool(args, "rx-flow-ctrl", false); > > > > +tx_fc_en = smap_get_bool(args, "tx-flow-ctrl", false); > > > > +dev->fc_conf.autoneg = smap_get_bool(args, "flow-ctrl-autoneg", > > > false); > > > > +dev->fc_conf.mode = fc_mode_set[tx_fc_en][rx_fc_en]; > > > > + > > > > +dpdk_eth_flow_ctrl_setup(dev); > > > > + > > > > ovs_mutex_unlock(&dev->mutex); > > > > > > > > return 0; > > > > -- > > > > 2.4.3 > > > > > > > > ___ > > > > dev mailing list > > > > dev@openvswitch.org > > > > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] netdev-dpdk: Remove unnecessary 'if' statement
> > Hi Ciara, > Thank you for fixing this. > Changes are looks fine for me. > A minor comment as below. > Acked! > > > Regards > _Sugesh > > > > -Original Message- > > From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ciara > Loftus > > Sent: Friday, August 12, 2016 5:17 PM > > To: dev@openvswitch.org > > Subject: [ovs-dev] [PATCH] netdev-dpdk: Remove unnecessary 'if' > > statement > > > > Only devices of type "DPDK_DEV_ETH" use the netdev_dpdk_set_config > > function, so no need to check for the device type within the function. > > > > Fixes: 9fd39370c12c ("netdev-dpdk: Add Flow Control support.") > > Signed-off-by: Ciara Loftus > > --- > > lib/netdev-dpdk.c | 27 +-- > > 1 file changed, 13 insertions(+), 14 deletions(-) > > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 9a1f7cd..f772998 > > 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -1024,20 +1024,19 @@ netdev_dpdk_set_config(struct netdev > *netdev, > > const struct smap *args) > > } > > > > /* Flow control configuration for DPDK Ethernet ports. */ > > -if (dev->type == DPDK_DEV_ETH) { > > -bool rx_fc_en = false; > > -bool tx_fc_en = false; > > -enum rte_eth_fc_mode fc_mode_set[2][2] = > > - {{RTE_FC_NONE, RTE_FC_TX_PAUSE}, > > -{RTE_FC_RX_PAUSE, RTE_FC_FULL} > > - }; > > -rx_fc_en = smap_get_bool(args, "rx-flow-ctrl", false); > > -tx_fc_en = smap_get_bool(args, "tx-flow-ctrl", false); > > -dev->fc_conf.autoneg = smap_get_bool(args, "flow-ctrl-autoneg", > > false); > > -dev->fc_conf.mode = fc_mode_set[tx_fc_en][rx_fc_en]; > > - > > -dpdk_eth_flow_ctrl_setup(dev); > > -} > [Sugesh] I would add a comment to say that the flow control is supported > only on > Eth/physical ports for better readability. Thanks for the review Sugesh, I'll improve the comment for the v2. Out of interest, do 'dpdkr' ivshm ports support flow ctrl? At the moment (with and without this patch) we attempt to initialise flow ctrl for these ports. If that's not intended behaviour I'll fix that in v2 as well. Thanks, Ciara > > +bool rx_fc_en = false; > > +bool tx_fc_en = false; > > +enum rte_eth_fc_mode fc_mode_set[2][2] = > > + {{RTE_FC_NONE, RTE_FC_TX_PAUSE}, > > +{RTE_FC_RX_PAUSE, RTE_FC_FULL} > > + }; > > +rx_fc_en = smap_get_bool(args, "rx-flow-ctrl", false); > > +tx_fc_en = smap_get_bool(args, "tx-flow-ctrl", false); > > +dev->fc_conf.autoneg = smap_get_bool(args, "flow-ctrl-autoneg", > false); > > +dev->fc_conf.mode = fc_mode_set[tx_fc_en][rx_fc_en]; > > + > > +dpdk_eth_flow_ctrl_setup(dev); > > + > > ovs_mutex_unlock(&dev->mutex); > > > > return 0; > > -- > > 2.4.3 > > > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v2 3/3] netdev-dpdk: vHost client mode and reconnect
> > > 2016-08-08 7:18 GMT-07:00 Loftus, Ciara : > > > > The patch mostly looks good to me, thanks. > > I'm not 100% sure about the interface. Can we make the flag interface > > specific? > > I'm not 100% sure about making the flag interface specific :) Do you think > there's a use case for both client and server mode ports to be used in > conjunction with each other? > > Well, I don't have any specific use case in mind :-). I just think it's > cleaner > making it per interface for two reasons: > * I'd like to provide the user with the maximum flexibility that the API > allows. I don't like adding artificial limitations, especially in user > interfaces, > since those are supposed to be stable. > * The behavior of an interface depends on the status of the switch. It's like > having a global variable that influences the behavior of all the functions. > > > > If I'm not mistaken we currently limit vhost-sock-dir to be under OVS > > rundir. With client mode this is not necessary anymore. > > Correct I've fixed this in the next version. Thanks. > > > I hope that client will be made the default mode at some point, I think we > > should keep that in mind when considering the interface. > > I agree. I think we should wait until at least the QEMU v2.7.0 release though. > > > Since we're planning to break compatibility with the dpdk phy naming > > change, maybe we can break compatibility also with vhost ports and add a > > path option. > > Ok. So something like this? > > ovs-vsctl add-port vhost0 > ovs-vsctl set Interface vhost0 options:vhost-path=/tmp/v0.sock > > Maybe we can rely on the presence of this attribute to discern between > client and server ports (I would call it vhost-server-path). I've added this in the next version. Do you think the user should be able to switch between modes? For the first pass I've only allowed the change from server -> client but not back the other way. Let me know what you think. I also assumed that vhost-server-path should point to the directory in which the client socket (whose name is the name of the port) should be found. Let me know if this isn’t what you intended. Thanks, Ciara > > Maybe something for a separate standalone patch? > > If we're going with the per-interface configuration I think this should be > done > at the same time. > Thanks, > Daniele > > Thanks, > Ciara > > > > > Thoughts? > > Daniele > > > > 2016-08-04 7:09 GMT-07:00 Ciara Loftus : > > A new other_config DB option has been added called 'vhost-driver-mode'. > > By default this is set to 'server' which is the mode of operation OVS > > with DPDK has used up until this point - whereby OVS creates and manages > > vHost user sockets. > > > > If set to 'client', OVS will act as the vHost client and connect to > > sockets created and managed by QEMU which acts as the server. This > mode > > allows for reconnect capability, which allows vHost ports to resume > > normal connectivity in event of switch reset. > > > > QEMU v2.7.0+ is required when using OVS in client mode and QEMU in > > server mode. > > > > Signed-off-by: Ciara Loftus > > --- > > v2 > > - Updated comments in vhost construct & destruct > > - Add check for server-mode before printing error when destruct is called > > on a running VM > > - Fixed coding style/standards issues > > - Use strcmp instead of strncmp when processing 'vhost-driver-mode' > > > > INSTALL.DPDK-ADVANCED.md | 27 +++ > > NEWS | 1 + > > lib/netdev-dpdk.c | 31 +++ > > vswitchd/vswitch.xml | 13 + > > 4 files changed, 64 insertions(+), 8 deletions(-) > > > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > > index f9587b5..a773533 100755 > > --- a/INSTALL.DPDK-ADVANCED.md > > +++ b/INSTALL.DPDK-ADVANCED.md > > @@ -483,6 +483,33 @@ For users wanting to do packet forwarding using > > kernel stack below are the steps > > where `-L`: Changes the numbers of channels of the specified network > > device > > and `combined`: Changes the number of multi-purpose channels. > > > > + 4. Enable OVS vHost client-mode & vHost reconnect (OPTIONAL) > > + > > + By default, OVS DPDK acts as the vHost socket server and QEMU the > > + client. In QEMU v2.7 the option is available for QEMU to act as the > > + serv
Re: [ovs-dev] [PATCH] netdev-dpdk: add DPDK pdump capability
> > This is interesting, thanks for working on this. > The patch looks pretty simple, most of the magic happens in DPDK, so I will > comment only on the OvS side of things. > Why is 'other_config:dpdk-pdump' required? Can't we always enable the > feature? I tried running with it and I didn't notice any slowdown, unless > there's actually a listener (I'm not sure if this is true for every driver, > though). Having a feature switch that requires a restart makes it really hard > to debug production systems, which I'd say is one of the most interesting use > cases of such a facility. I had performed most of my performance regression testing with pre-16.07 release candidates where I noticed a larger performance drop. I re-tested with v16.07 and the performance decrease as you said is negligible. So I've removed the flag in the v2. We always init if it is detected in the DPDK build. On another note, the patch in general introduces a 3-4% performance decrease, even when DPDK_PDUMP=false ie. no extra code executed in netdev-dpdk which is curious. > Should we perhaps call rte_pdump_init() with a path in ovs_rundir()? I'm not > sure what's the best practice for DPDK apps in this regard I've included this in the v2. Thanks for the feedback, Ciara > Thanks, > Daniele > > 2016-08-04 3:47 GMT-07:00 Ciara Loftus : > This commit provides the ability to 'listen' on DPDK ports and save > packets to a pcap file with a DPDK app that uses the librte_pdump > library. One such app is the 'pdump' app that can be found in the DPDK > 'app' directory. Instructions on how to use this can be found in > INSTALL.DPDK-ADVANCED.md > > The pdump feature is optional. Should you wish to use it, pcap libraries > must to be installed on the system and the > CONFIG_RTE_LIBRTE_PMD_PCAP=y > > Extra 'to' > > and CONFIG_RTE_LIBRTE_PDUMP=y options set in DPDK. Additionally you > must > set the 'dpdk-pdump' ovs other_config DB value to 'true'. > > Signed-off-by: Ciara Loftus > --- > INSTALL.DPDK-ADVANCED.md | 30 -- > NEWS | 1 + > acinclude.m4 | 23 +++ > lib/netdev-dpdk.c | 19 +++ > vswitchd/vswitch.xml | 12 > 5 files changed, 83 insertions(+), 2 deletions(-) > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > index c8d69ae..877824b 100755 > --- a/INSTALL.DPDK-ADVANCED.md > +++ b/INSTALL.DPDK-ADVANCED.md > @@ -12,7 +12,8 @@ OVS DPDK ADVANCED INSTALL GUIDE > 7. [QOS](#qos) > 8. [Rate Limiting](#rl) > 9. [Flow Control](#fc) > -10. [Vsperf](#vsperf) > +10. [Pdump](#pdump) > +11. [Vsperf](#vsperf) > > ## 1. Overview > > @@ -862,7 +863,32 @@ respective parameter. To disable the flow control at > tx side, > > `ovs-vsctl set Interface dpdk0 options:tx-flow-ctrl=false` > > -## 10. Vsperf > +## 10. Pdump > + > +Pdump allows you to listen on DPDK ports and view the traffic that is > +passing on them. To use this utility, one must have libpcap installed > +on the system. Furthermore, DPDK must be built with > CONFIG_RTE_LIBRTE_PDUMP=y > +and CONFIG_RTE_LIBRTE_PMD_PCAP=y. And finally, the following > database > +value must be set before launching the switch, like so: > + > +`ovs-vsctl set Open_vSwitch . other_config:dpdk-pdump=true` > + > +To use pdump, simply launch OVS as usual. Then, navigate to the > 'app/pdump' > +directory in DPDK, 'make' the application and run like so: > + > +`sudo ./build/app/dpdk_pdump -- --pdump 'port=0,queue=0,rx- > dev=/tmp/rx.pcap'` > + > +The above command captures traffic received on queue 0 of port 0 and > stores > +it in /tmp/rx.pcap. Other combinations of port numbers, queues numbers > and > +pcap locations are of course also available to use. More information on the > +pdump app and its usage can be found in the below link. > + > +http://dpdk.org/doc/guides/sample_app_ug/pdump.html > + > +A performance decrease is expected when using a monitoring application > like > +the DPDK pdump app. > + > +## 11. Vsperf > > Vsperf project goal is to develop vSwitch test framework that can be used to > validate the suitability of different vSwitch implementations in a Telco > deployment > diff --git a/NEWS b/NEWS > index c2ed71d..3f40e23 100644 > --- a/NEWS > +++ b/NEWS > @@ -69,6 +69,7 @@ Post-v2.5.0 > * Basic connection tracking for the userspace datapath (no ALG, > fragmentation or NAT support yet) > * Support for DPDK 16.07 > + * Optional support for DPDK pdump enabled. > - Increase number of registers to 16. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > diff --git a/acinclude.m4 b/acinclude.m4 > index f02166d..b8f1850 100644 > --- a/acinclude.m4 > +++ b/acinclude.m4 > @@ -211,6 +211,29 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > > AC_SEARCH_LIBS([get_mempolicy],[numa],[],[AC_MSG_ERROR([unable > to find libnuma, install the dependency package])]) > > + AC_COMPILE_IFELSE([ > +
Re: [ovs-dev] [PATCH v2 3/3] netdev-dpdk: vHost client mode and reconnect
> > The patch mostly looks good to me, thanks. > I'm not 100% sure about the interface. Can we make the flag interface > specific? I'm not 100% sure about making the flag interface specific :) Do you think there's a use case for both client and server mode ports to be used in conjunction with each other? > If I'm not mistaken we currently limit vhost-sock-dir to be under OVS > rundir. With client mode this is not necessary anymore. Correct I've fixed this in the next version. Thanks. > I hope that client will be made the default mode at some point, I think we > should keep that in mind when considering the interface. I agree. I think we should wait until at least the QEMU v2.7.0 release though. > Since we're planning to break compatibility with the dpdk phy naming > change, maybe we can break compatibility also with vhost ports and add a > path option. Ok. So something like this? ovs-vsctl add-port vhost0 ovs-vsctl set Interface vhost0 options:vhost-path=/tmp/v0.sock Maybe something for a separate standalone patch? Thanks, Ciara > > Thoughts? > Daniele > > 2016-08-04 7:09 GMT-07:00 Ciara Loftus : > A new other_config DB option has been added called 'vhost-driver-mode'. > By default this is set to 'server' which is the mode of operation OVS > with DPDK has used up until this point - whereby OVS creates and manages > vHost user sockets. > > If set to 'client', OVS will act as the vHost client and connect to > sockets created and managed by QEMU which acts as the server. This mode > allows for reconnect capability, which allows vHost ports to resume > normal connectivity in event of switch reset. > > QEMU v2.7.0+ is required when using OVS in client mode and QEMU in > server mode. > > Signed-off-by: Ciara Loftus > --- > v2 > - Updated comments in vhost construct & destruct > - Add check for server-mode before printing error when destruct is called > on a running VM > - Fixed coding style/standards issues > - Use strcmp instead of strncmp when processing 'vhost-driver-mode' > > INSTALL.DPDK-ADVANCED.md | 27 +++ > NEWS | 1 + > lib/netdev-dpdk.c | 31 +++ > vswitchd/vswitch.xml | 13 + > 4 files changed, 64 insertions(+), 8 deletions(-) > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > index f9587b5..a773533 100755 > --- a/INSTALL.DPDK-ADVANCED.md > +++ b/INSTALL.DPDK-ADVANCED.md > @@ -483,6 +483,33 @@ For users wanting to do packet forwarding using > kernel stack below are the steps > where `-L`: Changes the numbers of channels of the specified network > device > and `combined`: Changes the number of multi-purpose channels. > > + 4. Enable OVS vHost client-mode & vHost reconnect (OPTIONAL) > + > + By default, OVS DPDK acts as the vHost socket server and QEMU the > + client. In QEMU v2.7 the option is available for QEMU to act as the > + server. In order for this to work, OVS DPDK must be switched to > 'client' > + mode. This is possible by setting the 'vhost-driver-mode' DB entry to > + 'client' like so: > + > + ``` > + ovs-vsctl set Open_vSwitch . other_config:vhost-driver-mode="client" > + ``` > + > + This must be done before the switch is launched. It cannot sucessfully > + be changed after switch has launched. > + > + One must also append ',server' to the 'chardev' arguments on the > QEMU > + command line, to instruct QEMU to use vHost server mode, like so: > + > + > + -chardev > socket,id=char0,path=/usr/local/var/run/openvswitch/vhost0,server > + > + > + One benefit of using this mode is the ability for vHost ports to > + 'reconnect' in event of the switch crashing or being brought down. > Once > + it is brought back up, the vHost ports will reconnect automatically > and > + normal service will resume. > + > - VM Configuration with libvirt > > * change the user/group, access control policty and restart libvirtd. > diff --git a/NEWS b/NEWS > index 9f09e1c..99412ba 100644 > --- a/NEWS > +++ b/NEWS > @@ -70,6 +70,7 @@ Post-v2.5.0 > fragmentation or NAT support yet) > * Support for DPDK 16.07 > * Remove dpdkvhostcuse port type. > + * OVS client mode for vHost and vHost reconnect (Requires QEMU 2.7) > - Increase number of registers to 16. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 7692cc8..39c448b 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -136,7 +136,8 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / > ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) > #define OVS_VHOST_QUEUE_DISABLED (-2) /* Queue was disabled by > guest and not > * yet mapped to another queue. */ > > -static char *vhost_sock_dir = NULL; /* Location of vhost-u
Re: [ovs-dev] [PATCH] INSTALL.DPDK: Update documentation for DPDK 16.07 support
> > Replace 'dpdk_nic_bind.py' references with 'dpdk-devbind.py'. The script > name is changed in DPDK 16.07 as the script can be used also on crypto > devices along with NICs. > > Update the command for setting packet forwarding mode in 'testpmd' app > from 'set fwd mac_retry' to 'set fwd mac retry'. > > Signed-off-by: Bhanuprakash Bodireddy > > --- > INSTALL.DPDK.md | 16 > 1 file changed, 8 insertions(+), 8 deletions(-) > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index 253d022..c0686ce 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -153,8 +153,8 @@ advanced install guide [INSTALL.DPDK- > ADVANCED.md] > modprobe vfio-pci > sudo /usr/bin/chmod a+x /dev/vfio > sudo /usr/bin/chmod 0666 /dev/vfio/* > -$DPDK_DIR/tools/dpdk_nic_bind.py --bind=vfio-pci eth1 > -$DPDK_DIR/tools/dpdk_nic_bind.py --status > +$DPDK_DIR/tools/dpdk-devbind.py --bind=vfio-pci eth1 > +$DPDK_DIR/tools/dpdk-devbind.py --status > ``` > >Note: If running kernels < 3.6 UIO drivers to be used, > @@ -398,8 +398,8 @@ can be found in [Vhost Walkthrough]. >mount -t hugetlbfs hugetlbfs /dev/hugepages (only if not already > mounted) >modprobe uio >insmod $DPDK_BUILD/kmod/igb_uio.ko > - $DPDK_DIR/tools/dpdk_nic_bind.py --status > - $DPDK_DIR/tools/dpdk_nic_bind.py -b igb_uio 00:03.0 00:04.0 > + $DPDK_DIR/tools/dpdk-devbind.py --status > + $DPDK_DIR/tools/dpdk-devbind.py -b igb_uio 00:03.0 00:04.0 >``` > >vhost ports pci ids can be retrieved using `lspci | grep Ethernet` cmd. > @@ -570,18 +570,18 @@ can be found in [Vhost Walkthrough]. > ``` > cd $DPDK_DIR/app/test-pmd; > ./testpmd -c 0x3 -n 4 --socket-mem 1024 -- --burst=64 -i > --txqflags=0xf00 > --disable-hw-vlan > - set fwd mac_retry > + set fwd mac retry > start > ``` > > * Bind vNIC back to kernel once the test is completed. > > ``` > - $DPDK_DIR/tools/dpdk_nic_bind.py --bind=virtio-pci :00:03.0 > - $DPDK_DIR/tools/dpdk_nic_bind.py --bind=virtio-pci :00:04.0 > + $DPDK_DIR/tools/dpdk-devbind.py --bind=virtio-pci :00:03.0 > + $DPDK_DIR/tools/dpdk-devbind.py --bind=virtio-pci :00:04.0 > ``` > Note: Appropriate PCI IDs to be passed in above example. The PCI IDs > can be > - retrieved using '$DPDK_DIR/tools/dpdk_nic_bind.py --status' cmd. > + retrieved using '$DPDK_DIR/tools/dpdk-devbind.py --status' cmd. > > ### 5.3 PHY-VM-PHY [IVSHMEM] > > -- > 2.4.11 Acked-by: Ciara Loftus > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [ovs-dev, 3/3] netdev-dpdk: vHost client mode and reconnect
> > Hi, Ciara. > I'm suggesting also following change: > - > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 57dc437..f092fa2 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -959,7 +963,8 @@ netdev_dpdk_vhost_destruct(struct netdev > *netdev) > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > /* Guest becomes an orphan if still attached. */ > -if (netdev_dpdk_get_vid(dev) >= 0) { > +if (netdev_dpdk_get_vid(dev) >= 0 > +&& !(vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > VLOG_ERR("Removing port '%s' while vhost device still attached.", > netdev->name); > VLOG_ERR("To restore connectivity after re-adding of port, VM on > socket" > - > > Few comments inline. Thanks for the review Ilya. I've submitted a new version that incorporates your suggestions. Ciara > > On 04.08.2016 13:42, Ciara Loftus wrote: > > A new other_config DB option has been added called 'vhost-driver-mode'. > > By default this is set to 'server' which is the mode of operation OVS > > with DPDK has used up until this point - whereby OVS creates and manages > > vHost user sockets. > > > > If set to 'client', OVS will act as the vHost client and connect to > > sockets created and managed by QEMU which acts as the server. This > mode > > allows for reconnect capability, which allows vHost ports to resume > > normal connectivity in event of switch reset. > > > > QEMU v2.7.0+ is required when using OVS in client mode and QEMU in > > server mode. > > > > Signed-off-by: Ciara Loftus > > --- > > INSTALL.DPDK-ADVANCED.md | 27 +++ > > NEWS | 1 + > > lib/netdev-dpdk.c| 28 +--- > > vswitchd/vswitch.xml | 13 + > > 4 files changed, 62 insertions(+), 7 deletions(-) > > > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > > index f9587b5..a773533 100755 > > --- a/INSTALL.DPDK-ADVANCED.md > > +++ b/INSTALL.DPDK-ADVANCED.md > > @@ -483,6 +483,33 @@ For users wanting to do packet forwarding using > kernel stack below are the steps > > where `-L`: Changes the numbers of channels of the specified network > device > > and `combined`: Changes the number of multi-purpose channels. > > > > +4. Enable OVS vHost client-mode & vHost reconnect (OPTIONAL) > > + > > + By default, OVS DPDK acts as the vHost socket server and QEMU the > > + client. In QEMU v2.7 the option is available for QEMU to act as the > > + server. In order for this to work, OVS DPDK must be switched to > > 'client' > > + mode. This is possible by setting the 'vhost-driver-mode' DB entry > > to > > + 'client' like so: > > + > > + ``` > > + ovs-vsctl set Open_vSwitch . other_config:vhost-driver-mode="client" > > + ``` > > + > > + This must be done before the switch is launched. It cannot > > sucessfully > > + be changed after switch has launched. > > + > > + One must also append ',server' to the 'chardev' arguments on the > QEMU > > + command line, to instruct QEMU to use vHost server mode, like so: > > + > > + > > + -chardev > socket,id=char0,path=/usr/local/var/run/openvswitch/vhost0,server > > + > > + > > + One benefit of using this mode is the ability for vHost ports to > > + 'reconnect' in event of the switch crashing or being brought down. > Once > > + it is brought back up, the vHost ports will reconnect automatically > > and > > + normal service will resume. > > + > >- VM Configuration with libvirt > > > > * change the user/group, access control policty and restart libvirtd. > > diff --git a/NEWS b/NEWS > > index 9f09e1c..99412ba 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -70,6 +70,7 @@ Post-v2.5.0 > > fragmentation or NAT support yet) > > * Support for DPDK 16.07 > > * Remove dpdkvhostcuse port type. > > + * OVS client mode for vHost and vHost reconnect (Requires QEMU 2.7) > > - Increase number of registers to 16. > > - ovs-benchmark: This utility has been removed due to lack of use and > > bitrot. > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 7692cc8..c528cb4 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -136,7 +136,8 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / > ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) > > #define OVS_VHOST_QUEUE_DISABLED(-2) /* Queue was disabled by > guest and not > >* yet mapped to another queue. */ > > > > -static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */ > > +static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets > > */ > > +static uint64_t vhost_driver_flags = 0; /* Denote w
Re: [ovs-dev] [PATCH] netdev-dpdk: Avoid reconfiguration on reconnection of same vhost device.
> > Binding/unbinding of virtio driver inside VM leads to reconfiguration > of PMD threads. This behaviour may be abused by executing bind/unbind > in an infinite loop to break normal networking on all ports attached > to the same instance of Open vSwitch. > > Fix that by avoiding reconfiguration if it's not necessary. > Number of queues will not be decreased to 1 on device disconnection but > it's not very important in comparison with possible DOS attack from the > inside of guest OS. > > Fixes: 81acebdaaf27 ("netdev-dpdk: Obtain number of queues for vhost > ports from attached virtio.") > Reported-by: Ciara Loftus > Signed-off-by: Ilya Maximets > --- > lib/netdev-dpdk.c | 17 - > 1 file changed, 8 insertions(+), 9 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index a0d541a..98369f1 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -2273,11 +2273,14 @@ new_device(struct virtio_net *virtio_dev) > newnode = dev->socket_id; > } > > -dev->requested_socket_id = newnode; > -dev->requested_n_rxq = qp_num; > -dev->requested_n_txq = qp_num; > -netdev_request_reconfigure(&dev->up); > - > +if (dev->requested_n_txq != qp_num > +|| dev->requested_n_rxq != qp_num > +|| dev->requested_socket_id != newnode) { > +dev->requested_socket_id = newnode; > +dev->requested_n_rxq = qp_num; > +dev->requested_n_txq = qp_num; > +netdev_request_reconfigure(&dev->up); > +} > ovsrcu_set(&dev->virtio_dev, virtio_dev); > exists = true; > > @@ -2333,11 +2336,7 @@ destroy_device(volatile struct virtio_net > *virtio_dev) > ovs_mutex_lock(&dev->mutex); > virtio_dev->flags &= ~VIRTIO_DEV_RUNNING; > ovsrcu_set(&dev->virtio_dev, NULL); > -/* Clear tx/rx queue settings. */ > netdev_dpdk_txq_map_clear(dev); > -dev->requested_n_rxq = NR_QUEUE; > -dev->requested_n_txq = NR_QUEUE; > -netdev_request_reconfigure(&dev->up); Hi Ilya, I assume we will still poll on N queues despite the device being down? Do you have any data showing how this may affect performance? Thanks, Ciara > > netdev_change_seq_changed(&dev->up); > ovs_mutex_unlock(&dev->mutex); > -- > 2.7.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] ovs-rcu: Add new ovsrcu_index type.
> > With RCU in Open vSwitch it's very easy to protect objects accessed by > a pointer, but sometimes a pointer is not available. > > One example is the vhost id for DPDK 16.07. Until DPDK 16.04 a pointer > was used to access a vhost device with RCU semantics. From DPDK 16.07 > an integer id (which is an array index) is used to access a vhost > device. Ideally, we want the exact same RCU semantics that we had for > the pointer, on the integer (atomicity, memory barriers, behaviour > around quiescent states) > > This commit implements a new type in ovs-rcu: ovsrcu_index. The newly > implemented ovsrcu_index_*() functions should be used to access the > type. > > Even though we say "Do not, in general, declare a typedef for a struct, > union, or enum.", I think we're not in the "general" case. > > Signed-off-by: Daniele Di Proietto > --- > lib/ovs-rcu.h | 84 > ++ > + > 1 file changed, 84 insertions(+) > > diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h > index dc75749..2887bb8 100644 > --- a/lib/ovs-rcu.h > +++ b/lib/ovs-rcu.h > @@ -125,6 +125,36 @@ > * ovs_mutex_unlock(&mutex); > * } > * > + * In some rare cases an object may not be addressable with a pointer, but > only > + * through an array index (e.g. because it's provided by another library). > It > + * is still possible to have RCU semantics by using the ovsrcu_index type. > + * > + * static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; > + * > + * ovsrcu_index port_id; > + * > + * void tx() > + * { > + * int id = ovsrcu_index_get(&port_id); > + * if (id == -1) { > + * return; > + * } > + * port_tx(id); > + * } > + * > + * void delete() > + * { > + * int id; > + * > + * ovs_mutex_lock(&mutex); > + * id = ovsrcu_index_get_protected(&port_id); > + * ovsrcu_index_set(&port_id, -1); > + * ovs_mutex_unlock(&mutex); > + * > + * ovsrcu_synchronize(); > + * port_delete(id); > + * } > + * > */ > > #include "compiler.h" > @@ -213,6 +243,60 @@ void ovsrcu_postpone__(void (*function)(void > *aux), void *aux); > (void) sizeof(*(ARG)), \ > ovsrcu_postpone__((void (*)(void *))(FUNCTION), ARG)) > > +/* An array index protected by RCU semantics. This is an easier alternative > to > + * an RCU protected pointer to a malloc'd int. */ > +typedef struct { atomic_int v; } ovsrcu_index; > + > +static inline int ovsrcu_index_get__(const ovsrcu_index *i, memory_order > order) > +{ > +int ret; > +atomic_read_explicit(CONST_CAST(atomic_int *, &i->v), &ret, order); > +return ret; > +} > + > +/* Returns the index contained in 'i'. The returned value can be used until > + * the next grace period. */ > +static inline int ovsrcu_index_get(const ovsrcu_index *i) > +{ > +return ovsrcu_index_get__(i, memory_order_consume); > +} > + > +/* Returns the index contained in 'i'. This is an alternative to > + * ovsrcu_index_get() that can be used when there's no possible > concurrent > + * writer. */ > +static inline int ovsrcu_index_get_protected(const ovsrcu_index *i) > +{ > +return ovsrcu_index_get__(i, memory_order_relaxed); > +} > + > +static inline void ovsrcu_index_set__(ovsrcu_index *i, int value, > + memory_order order) > +{ > +atomic_store_explicit(&i->v, value, order); > +} > + > +/* Writes the index 'value' in 'i'. The previous value of 'i' may still be > + * used by readers until the next grace period. */ > +static inline void ovsrcu_index_set(ovsrcu_index *i, int value) > +{ > +ovsrcu_index_set__(i, value, memory_order_release); > +} > + > +/* Writes the index 'value' in 'i'. This is an alternative to > + * ovsrcu_index_set() that can be used when there's no possible > concurrent > + * reader. */ > +static inline void ovsrcu_index_set_hidden(ovsrcu_index *i, int value) > +{ > +ovsrcu_index_set__(i, value, memory_order_relaxed); > +} > + > +/* Initializes 'i' with 'value'. This is safe to call as long as there are > no > + * concurrent readers. */ > +static inline void ovsrcu_index_init(ovsrcu_index *i, int value) > +{ > +atomic_init(&i->v, value); > +} > + > /* Quiescent states. */ > void ovsrcu_quiesce_start(void); > void ovsrcu_quiesce_end(void); > -- > 2.8.1 Tested-by: Ciara Loftus > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC v3 1/1] netdev-dpdk: Add support for DPDK 16.07
> > Given that using vhost PMD doesn't seem viable in the very short term, I > think we should stick with the vhost lib. > I sent a patch for ovsrcu to add a new RCU protected array index. > > http://openvswitch.org/pipermail/dev/2016-August/077097.html > Thanks, > Daniele Thanks Daniele, I submitted a new version of this patch that uses the vhost lib & the new RCU index: http://openvswitch.org/pipermail/dev/2016-August/077125.html Thanks, Ciara > > 2016-07-28 6:26 GMT-07:00 Loftus, Ciara : > > > > Thanks for the patch. > > I have another concern with this. If we're still going to rely on RCU to > protect > > the vhost device (and as pointed out by Ilya, I think we should) we need to > > use RCU-like semantics on the vid array index. I'm not sure a boolean flag > > is > > going to be enough. > > CCing Jarno: > > We have this int, which is an index into an array of vhost devices (the > > array > is > > inside the DPDK library). We want to make sure that when > > ovsrcu_synchronize() returns nobody is using the old index anymore. > > Should we introduce an RCU type for indexing into arrays? I found some > > negative opinions here: > > > > https://git.kernel.org/cgit/linux/kernel/git/next/linux- > > next.git/tree/Documentation/RCU/arrayRCU.txt?id=refs/tags/next- > > 20160722#n13 > > but I think using atomics should prevent the compiler from playing tricks > with > > the index. > > > > How about something like the code below? > > Thanks, > > Daniele > > I think the best way forward here is to avoid the RCU mechanisms by > merging the vHost PMD first as you have previously suggested. What do you > think? > If we don't go with that, I think we need to make a decision ASAP on how to > handle the RCU (ie. is below code needed?) as both DPDK and 2.6 releases > are imminent. > > Thanks, > Ciara > > > > > > > diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h > > index dc75749..d1a57f6 100644 > > --- a/lib/ovs-rcu.h > > +++ b/lib/ovs-rcu.h > > @@ -130,6 +130,41 @@ > > #include "compiler.h" > > #include "ovs-atomic.h" > > > > +typedef struct { atomic_int v; } ovsrcu_int; > > + > > +static inline int ovsrcu_int_get__(const ovsrcu_int *i, memory_order > order) > > +{ > > + int ret; > > + atomic_read_explicit(CONST_CAST(atomic_int *, &i->v), &ret, order); > > + return ret; > > +} > > + > > +static inline int ovsrcu_int_get(const ovsrcu_int *i) > > +{ > > + return ovsrcu_int_get__(i, memory_order_consume); > > +} > > + > > +static inline int ovsrcu_int_get_protected(const ovsrcu_int *i) > > +{ > > + return ovsrcu_int_get__(i, memory_order_relaxed); > > +} > > + > > +static inline void ovsrcu_int_set__(ovsrcu_int *i, int value, > > + memory_order order) > > +{ > > + atomic_store_explicit(&i->v, value, order); > > +} > > + > > +static inline void ovsrcu_int_set(ovsrcu_int *i, int value) > > +{ > > + ovsrcu_int_set__(i, value, memory_order_release); > > +} > > + > > +static inline void ovsrcu_int_set_protected(ovsrcu_int *i, int value) > > +{ > > + ovsrcu_int_set__(i, value, memory_order_relaxed); > > +} > > + > > #if __GNUC__ > > #define OVSRCU_TYPE(TYPE) struct { ATOMIC(TYPE) p; } > > #define OVSRCU_INITIALIZER(VALUE) { ATOMIC_VAR_INIT(VALUE) } > > > > > > 2016-07-22 8:55 GMT-07:00 Ciara Loftus : > > This commit introduces support for DPDK 16.07 and consequently breaks > > compatibility with DPDK 16.04. > > > > DPDK 16.07 introduces some changes to various APIs. These have been > > updated in OVS, including: > > * xstats API: changes to structure of xstats > > * vhost API: replace virtio-net references with 'vid' > > > > Signed-off-by: Ciara Loftus > > Tested-by: Maxime Coquelin > > > > v3: > > - fixed style issues > > - fixed & simplified xstats frees > > - use xcalloc & free instead of rte_mzalloc & rte_free for stats > > - remove libnuma include > > - fixed & simplified vHost NUMA set > > - added flag to indicate device reconfigured at least once > > - re-add call to rcu synchronise in destroy_device > > - define IF_NAME_SZ and use instead of PATH_MAX > > > > v2: > > - rebase with DPDK rc2 > > - rebase with OVS master > > - fix vhost cuse compilation > > --- > > .travis/linux-bui
Re: [ovs-dev] [ovs-dev,v4,3/5] netdev-dpdk: Add vHost User PMD
> > I've applied this patch and performed following test: > > OVS with 2 VMs connected via vhost-user ports. > Each vhost-user port has 4 queues. > > VM1 executes ping on LOCAL port. > In normal situation ping results are following: > > 100 packets transmitted, 100 received, 0% packet loss, time 99144ms > rtt min/avg/max/mdev = 0.231/0.459/0.888/0.111 ms > > After that VM2 starts execution of this script: > > while true; > do > ethtool -L eth0 combined 4; > ethtool -L eth0 combined 1; > done > > Now results of ping between VM1 and LOCAL port are: > > 100 packets transmitted, 100 received, 0% packet loss, time 99116ms > rtt min/avg/max/mdev = 5.466/150.327/356.201/85.208 ms > > Minimal time increased from 0.231 to 5.466 ms. > Average time increased from 0.459 to 150.327 ms (~300 times)! > > This happens because of constant reconfiguration requests from > the 'vring_state_changed_callback()'. > > As Ciara said, "Previously we could work with only reconfiguring during > link status change as we had full information available to us > ie. virtio_net->virt_qp_nb. We don't have that any more, so we need to > count the queues in OVS now every time we get a vring_change." > > Test above shows that this is unacceptable for OVS to perform > reconfiguration each time vring state changed because this leads to > ability for the guest user to break normal networking on all ports > connected to the same instance of Open vSwitch. Hi Ilya, Another thought on this. With the current master branch, isn't the above possible too with a script like this: while true; do echo ":00:03.0" > /sys/bus/pci/drivers/virtio-pci/bind echo ":00:03.0" > /sys/bus/pci/drivers/virtio-pci/unbind done The bind/unbind calls new/destroy device which in turn call reconfigure() each time. Thanks, Ciara > > If this vulnerability is unavoidable with current version of vHost PMD, > I'm suggesting to postpone it's integration until there will be > method or special API to get number of queues from the inside of > 'link_status_changed_callback()'. > > I've added vHost maintainers to CC-list to hear their opinion about > new API to get number of queues from the vHost PMD. > Maybe we can expose 'rte_vhost_get_queue_num()' somehow or make > 'dev_info->nb_rx_queues' usable? > > NACK for now. > > Best regards, Ilya Maximets. > > On 29.07.2016 16:24, Ciara Loftus wrote: > > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports > > to be controlled by the librte_ether API, like physical 'dpdk' ports and > > IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and > > removes direct calls to the librte_vhost DPDK library. > > > > This commit removes extended statistics support for vHost User ports > > until such a time that this becomes available in the vHost PMD in a > > DPDK release supported by OVS. > > > > Signed-off-by: Ciara Loftus > > --- > > INSTALL.DPDK.md | 10 + > > NEWS | 2 + > > lib/netdev-dpdk.c | 857 ++- > --- > > 3 files changed, 300 insertions(+), 569 deletions(-) > > > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > > index 7609aa7..4feb7be 100644 > > --- a/INSTALL.DPDK.md > > +++ b/INSTALL.DPDK.md > > @@ -604,6 +604,16 @@ can be found in [Vhost Walkthrough]. > > > > http://dpdk.org/doc/guides/rel_notes/release_16_04.html > > > > + - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context > of > > +DPDK as they are all managed by the rte_ether API. This means that > they > > +adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS > which by > > +default is set to 32. This means by default the combined total number > > of > > +dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32. > This > > +value can be changed if desired by modifying the configuration file in > > +DPDK, or by overriding the default value on the command line when > building > > +DPDK. eg. > > + > > +`make install CONFIG_RTE_MAX_ETHPORTS=64` > > > > Bug Reporting: > > -- > > diff --git a/NEWS b/NEWS > > index dc3dedb..6510dde 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -64,6 +64,8 @@ Post-v2.5.0 > > * Basic connection tracking for the userspace datapath (no ALG, > > fragmentation or NAT support yet) > > * Remove dpdkvhostcuse port type. > > + * vHost PMD integration brings vhost-user ports under control of the > > + rte_ether DPDK API. > > - Increase number of registers to 16. > > - ovs-benchmark: This utility has been removed due to lack of use and > > bitrot. > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index d6959fe..d6ceeec 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -30,7 +30,6 @@ > > #include > > #include > > #include > > -#include > > > > #include "dirs.h" > > #include "dp-packet.
Re: [ovs-dev] [ovs-dev,v4,3/5] netdev-dpdk: Add vHost User PMD
> Subject: Re: [ovs-dev,v4,3/5] netdev-dpdk: Add vHost User PMD > > I've applied this patch and performed following test: > > OVS with 2 VMs connected via vhost-user ports. > Each vhost-user port has 4 queues. > > VM1 executes ping on LOCAL port. > In normal situation ping results are following: > > 100 packets transmitted, 100 received, 0% packet loss, time 99144ms > rtt min/avg/max/mdev = 0.231/0.459/0.888/0.111 ms > > After that VM2 starts execution of this script: > > while true; > do > ethtool -L eth0 combined 4; > ethtool -L eth0 combined 1; > done > > Now results of ping between VM1 and LOCAL port are: > > 100 packets transmitted, 100 received, 0% packet loss, time 99116ms > rtt min/avg/max/mdev = 5.466/150.327/356.201/85.208 ms > > Minimal time increased from 0.231 to 5.466 ms. > Average time increased from 0.459 to 150.327 ms (~300 times)! > > This happens because of constant reconfiguration requests from > the 'vring_state_changed_callback()'. Thanks for testing Ilya. I reproduced and saw the same behaviour. I'm brainstorming potential workarounds until we can get something like 'rte_vhost_get_queue_num()' in the PMD in a later DPDK release as you suggested. Before the reconfigure() framework, we used to use netdev_dpdk_vhost_set_queues(). Wondering if we can perhaps do something similar to this: diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 8ed3f83..255fb29 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -752,6 +752,24 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs) } } +static int +netdev_dpdk_vhost_set_queues(struct netdev_dpdk *dev) +OVS_REQUIRES(dev->mutex) +{ +dev->up.n_txq = dev->requested_n_txq; +dev->up.n_rxq = dev->requested_n_rxq; + +/* Enable TX queue 0 by default if it wasn't disabled. */ +if (dev->tx_q[0].map == OVS_VHOST_QUEUE_MAP_UNKNOWN) { +dev->tx_q[0].map = 0; +} + +netdev_dpdk_remap_txqs(dev); + +return 0; +} + + void link_status_changed_callback(uint8_t port_id, enum rte_eth_event_type type OVS_UNUSED, @@ -822,7 +840,7 @@ vring_state_changed_callback(uint8_t port_id, } dev->requested_n_rxq = dev->vhost_qp_nb; dev->requested_n_txq = dev->vhost_qp_nb; -netdev_request_reconfigure(&dev->up); +netdev_dpdk_vhost_set_queues(dev); ovs_mutex_unlock(&dev->mutex); break; } @@ -2567,9 +2585,6 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev) ovs_mutex_lock(&dpdk_mutex); ovs_mutex_lock(&dev->mutex); -netdev->n_txq = dev->requested_n_txq; -netdev->n_rxq = dev->requested_n_rxq; - /* Enable TX queue 0 by default if it wasn't disabled. */ if (dev->tx_q[0].map == OVS_VHOST_QUEUE_MAP_UNKNOWN) { dev->tx_q[0].map = 0; I tested the above and it removes the added ping latency. It's probably not good to modify the netdev outside of reconfigure though. I welcome other suggestions/input. Thanks, Ciara > > As Ciara said, "Previously we could work with only reconfiguring during > link status change as we had full information available to us > ie. virtio_net->virt_qp_nb. We don't have that any more, so we need to > count the queues in OVS now every time we get a vring_change." > > Test above shows that this is unacceptable for OVS to perform > reconfiguration each time vring state changed because this leads to > ability for the guest user to break normal networking on all ports > connected to the same instance of Open vSwitch. > > If this vulnerability is unavoidable with current version of vHost PMD, > I'm suggesting to postpone it's integration until there will be > method or special API to get number of queues from the inside of > 'link_status_changed_callback()'. > > I've added vHost maintainers to CC-list to hear their opinion about > new API to get number of queues from the vHost PMD. > Maybe we can expose 'rte_vhost_get_queue_num()' somehow or make > 'dev_info->nb_rx_queues' usable? > > NACK for now. > > Best regards, Ilya Maximets. > > On 29.07.2016 16:24, Ciara Loftus wrote: > > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports > > to be controlled by the librte_ether API, like physical 'dpdk' ports and > > IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and > > removes direct calls to the librte_vhost DPDK library. > > > > This commit removes extended statistics support for vHost User ports > > until such a time that this becomes available in the vHost PMD in a > > DPDK release supported by OVS. > > > > Signed-off-by: Ciara Loftus > > --- > > INSTALL.DPDK.md | 10 + > > NEWS | 2 + > > lib/netdev-dpdk.c | 857 ++- > --- > > 3 files changed, 300 insertions(+), 569 deletions(-) > > > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > > in
Re: [ovs-dev] [ovs-dev,v3,3/5] netdev-dpdk: Add vHost User PMD
> > Not the complete review. Just few comments to design. Hi Ilya, Thanks for the feedback. > > And what about performance? Is there any difference in comparison to > current version of code? I guess, this may be slower than direct > access to vhost library. I had the same concern I've measured the difference and it's negligible. > > Comments inline. > > Best regards, Ilya Maximets. > > On 28.07.2016 19:21, Ciara Loftus wrote: > > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports > > to be controlled by the librte_ether API, like physical 'dpdk' ports and > > IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and > > removes direct calls to the librte_vhost DPDK library. > > > > This commit removes extended statistics support for vHost User ports > > until such a time that this becomes available in the vHost PMD in a > > DPDK release supported by OVS. > > > > Signed-off-by: Ciara Loftus > > --- > > INSTALL.DPDK.md | 10 + > > NEWS | 2 + > > lib/netdev-dpdk.c | 857 ++- > --- > > 3 files changed, 300 insertions(+), 569 deletions(-) > > > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > > index 7609aa7..4feb7be 100644 > > --- a/INSTALL.DPDK.md > > +++ b/INSTALL.DPDK.md > > @@ -604,6 +604,16 @@ can be found in [Vhost Walkthrough]. > > > > http://dpdk.org/doc/guides/rel_notes/release_16_04.html > > > > + - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context > of > > +DPDK as they are all managed by the rte_ether API. This means that > they > > +adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS > which by > > +default is set to 32. This means by default the combined total number > > of > > +dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32. > This > > +value can be changed if desired by modifying the configuration file in > > +DPDK, or by overriding the default value on the command line when > building > > +DPDK. eg. > > + > > +`make install CONFIG_RTE_MAX_ETHPORTS=64` > > > > Bug Reporting: > > -- > > diff --git a/NEWS b/NEWS > > index dc3dedb..6510dde 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -64,6 +64,8 @@ Post-v2.5.0 > > * Basic connection tracking for the userspace datapath (no ALG, > > fragmentation or NAT support yet) > > * Remove dpdkvhostcuse port type. > > + * vHost PMD integration brings vhost-user ports under control of the > > + rte_ether DPDK API. > > - Increase number of registers to 16. > > - ovs-benchmark: This utility has been removed due to lack of use and > > bitrot. > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index d6959fe..d6ceeec 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -30,7 +30,6 @@ > > #include > > #include > > #include > > -#include > > > > #include "dirs.h" > > #include "dp-packet.h" > > @@ -56,9 +55,9 @@ > > #include "unixctl.h" > > > > #include "rte_config.h" > > +#include "rte_eth_vhost.h" > > #include "rte_mbuf.h" > > #include "rte_meter.h" > > -#include "rte_virtio_net.h" > > > > VLOG_DEFINE_THIS_MODULE(dpdk); > > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > > @@ -141,6 +140,9 @@ static char *vhost_sock_dir = NULL; /* Location of > vhost-user sockets */ > > > > #define VHOST_ENQ_RETRY_NUM 8 > > > > +/* Array that tracks the used & unused vHost user driver IDs */ > > +static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS]; > > + > > static const struct rte_eth_conf port_conf = { > > .rxmode = { > > .mq_mode = ETH_MQ_RX_RSS, > > @@ -346,12 +348,15 @@ struct netdev_dpdk { > > struct rte_eth_link link; > > int link_reset_cnt; > > > > -/* virtio-net structure for vhost device */ > > -OVSRCU_TYPE(struct virtio_net *) virtio_dev; > > +/* Number of virtqueue pairs reported by the guest */ > > +uint32_t vhost_qp_nb; > > > > /* Identifier used to distinguish vhost devices from each other */ > > char vhost_id[PATH_MAX]; > > > > +/* ID of vhost user port given to the PMD driver */ > > +unsigned int vhost_pmd_id; > > + > > /* In dpdk_list. */ > > struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); > > > > @@ -382,16 +387,23 @@ struct netdev_rxq_dpdk { > > static bool dpdk_thread_is_pmd(void); > > > > static int netdev_dpdk_construct(struct netdev *); > > - > > -struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk > *dev); > > +static int netdev_dpdk_vhost_construct(struct netdev *); > > > > struct ingress_policer * > > netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev); > > > > +static void link_status_changed_callback(uint8_t port_id, > > +enum rte_eth_event_type type, void *param); > > +static void vring_state_changed_callback(uint8_t port_id, > > +enum rte_eth_event_type type, void *param); > > +static void netdev_dpdk_remap_txqs(
Re: [ovs-dev] [PATCH RFC 1/1] netdev-dpdk: add DPDK pdump capability
> > This looks like a nice useablity feature to add. > Two questions inline > Regards sean. Thanks Sean, replies inline. Ciara > > > -Original Message- > > From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ciara > Loftus > > Sent: Friday, July 29, 2016 10:58 AM > > To: dev@openvswitch.org > > Subject: [ovs-dev] [PATCH RFC 1/1] netdev-dpdk: add DPDK pdump > capability > > > > This commit provides the ability to 'listen' on DPDK ports and save packets > to a > > pcap file with a DPDK app that uses the librte_pdump library. One such app > is the > > 'pdump' app that can be found in the DPDK 'app' directory. Instructions on > how to > > use this can be found in INSTALL.DPDK-ADVANCED.md > > > > The pdump feature is optional. Should you wish to use it, pcap libraries > must to > > be installed on the system and the CONFIG_RTE_LIBRTE_PMD_PCAP=y > and > > CONFIG_RTE_LIBRTE_PDUMP=y options set in DPDK. Additionally you must > set > > the 'dpdk-pdump' ovs other_config DB value to 'true'. > > > > Signed-off-by: Ciara Loftus > > --- > > INSTALL.DPDK-ADVANCED.md | 27 +-- > > NEWS | 1 + > > acinclude.m4 | 23 +++ > > lib/netdev-dpdk.c| 19 +++ > > vswitchd/vswitch.xml | 12 > > 5 files changed, 80 insertions(+), 2 deletions(-) > > > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > index > > ec1de29..0ffafa3 100644 > > --- a/INSTALL.DPDK-ADVANCED.md > > +++ b/INSTALL.DPDK-ADVANCED.md > > @@ -11,7 +11,8 @@ OVS DPDK ADVANCED INSTALL GUIDE 6. [Vhost > > Walkthrough](#vhost) 7. [QOS](#qos) 8. [Rate Limiting](#rl) -9. > > [Vsperf](#vsperf) > > +9. [Pdump](#pdump) > > +10. [Vsperf](#vsperf) > > > > ## 1. Overview > > > > @@ -827,7 +828,29 @@ To clear the ingress policer configuration from the > port > > use the following: > > > > For more details regarding ingress-policer see the vswitch.xml. > > > > -## 9. Vsperf > > +## 9. Pdump > > + > > +Pdump allows you to listen on DPDK ports and view the traffic that is > > +passing on them. To use this utility, one must have libpcap installed > > +on the system. Furthermore, DPDK must be built with > > +CONFIG_RTE_LIBRTE_PDUMP=y and > CONFIG_RTE_LIBRTE_PMD_PCAP=y. And > > +finally, the following database value must be set before launching the > switch, > > like so: > > + > > +`ovs-vsctl set Open_vSwitch . other_config:dpdk-pdump=true` > > + > > +To use pdump, simply launch OVS as usual. Then, navigate to the > 'app/pdump' > > +directory in DPDK, 'make' the application and run like so: > > + > > +`sudo ./build/app/dpdk_pdump -- --pdump > > +'port=0,queue=0,rx-dev=/tmp/rx.pcap'` > [Mooney, Sean K] can the dpdk_pdump utility dump non dpdk physical ports > such as > Vhost-user ports or dump all queues on a port at the same time? The user guide for the pdump sample app can be found here which has more info: http://dpdk.org/browse/dpdk/tree/doc/guides/sample_app_ug/pdump.rst?h=releases I might add a reference to this in the docs. Re: non-physical ports: it appears not due to a bug in the eth_dev library (see doc above). Re: all queues at the same time: it appears you can, by specifying queue_id=*. > Am I correct in saying that Port=0 in this case indicate the first index in > the > dpdk port > list which is dpdk0 from an ovs perspective or is this the port id as shown in > ovs-appctl dpctl/show? It is the DPDK port_id as the pdump app has no awareness of OVS other than that it is a DPDK primary process. So the port_id is the dpdk port_id ie. =0 for dpdk0 > > + > > +The above command captures traffic received on queue 0 of port 0 and > > +stores it in /tmp/rx.pcap. Other combinations of port numbers, queues > > +numbers and pcap locations are of course also available to use. > > + > > +A small performance decrease is seen when dpdk-pdump=true. This > > +decrease is larger when using a monitoring application like the DPDK > pdump > > app. > > + > > +## 10. Vsperf > > > > Vsperf project goal is to develop vSwitch test framework that can be used > to > > validate the suitability of different vSwitch implementations in a Telco > > deployment diff --git a/NEWS b/NEWS index 32975b0..f59b3b0 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -64,6 +64,7 @@ Post-v2.5.0 > > * Basic connection tracking for the userspace datapath (no ALG, > > fragmentation or NAT support yet) > > * Support for DPDK 16.07 > > + * Optional support for DPDK pdump enabled. > > - Increase number of registers to 16. > > - ovs-benchmark: This utility has been removed due to lack of use and > > bitrot. > > diff --git a/acinclude.m4 b/acinclude.m4 index faf79eb..0c1dafd 100644 > > --- a/acinclude.m4 > > +++ b/acinclude.m4 > > @@ -211,6 +211,29 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > > > > AC_SEARCH_LIBS([get_mempolicy],[numa],[],[AC_MSG_ERROR([unable > to > > find libnuma, install the dependency package])]) > > > > +
Re: [ovs-dev] [PATCH RFC v3 1/1] netdev-dpdk: Add support for DPDK 16.07
> > Thanks for the patch. > I have another concern with this. If we're still going to rely on RCU to > protect > the vhost device (and as pointed out by Ilya, I think we should) we need to > use RCU-like semantics on the vid array index. I'm not sure a boolean flag is > going to be enough. > CCing Jarno: > We have this int, which is an index into an array of vhost devices (the array > is > inside the DPDK library). We want to make sure that when > ovsrcu_synchronize() returns nobody is using the old index anymore. > Should we introduce an RCU type for indexing into arrays? I found some > negative opinions here: > > https://git.kernel.org/cgit/linux/kernel/git/next/linux- > next.git/tree/Documentation/RCU/arrayRCU.txt?id=refs/tags/next- > 20160722#n13 > but I think using atomics should prevent the compiler from playing tricks with > the index. > > How about something like the code below? > Thanks, > Daniele I think the best way forward here is to avoid the RCU mechanisms by merging the vHost PMD first as you have previously suggested. What do you think? If we don't go with that, I think we need to make a decision ASAP on how to handle the RCU (ie. is below code needed?) as both DPDK and 2.6 releases are imminent. Thanks, Ciara > > > diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h > index dc75749..d1a57f6 100644 > --- a/lib/ovs-rcu.h > +++ b/lib/ovs-rcu.h > @@ -130,6 +130,41 @@ > #include "compiler.h" > #include "ovs-atomic.h" > > +typedef struct { atomic_int v; } ovsrcu_int; > + > +static inline int ovsrcu_int_get__(const ovsrcu_int *i, memory_order order) > +{ > + int ret; > + atomic_read_explicit(CONST_CAST(atomic_int *, &i->v), &ret, order); > + return ret; > +} > + > +static inline int ovsrcu_int_get(const ovsrcu_int *i) > +{ > + return ovsrcu_int_get__(i, memory_order_consume); > +} > + > +static inline int ovsrcu_int_get_protected(const ovsrcu_int *i) > +{ > + return ovsrcu_int_get__(i, memory_order_relaxed); > +} > + > +static inline void ovsrcu_int_set__(ovsrcu_int *i, int value, > + memory_order order) > +{ > + atomic_store_explicit(&i->v, value, order); > +} > + > +static inline void ovsrcu_int_set(ovsrcu_int *i, int value) > +{ > + ovsrcu_int_set__(i, value, memory_order_release); > +} > + > +static inline void ovsrcu_int_set_protected(ovsrcu_int *i, int value) > +{ > + ovsrcu_int_set__(i, value, memory_order_relaxed); > +} > + > #if __GNUC__ > #define OVSRCU_TYPE(TYPE) struct { ATOMIC(TYPE) p; } > #define OVSRCU_INITIALIZER(VALUE) { ATOMIC_VAR_INIT(VALUE) } > > > 2016-07-22 8:55 GMT-07:00 Ciara Loftus : > This commit introduces support for DPDK 16.07 and consequently breaks > compatibility with DPDK 16.04. > > DPDK 16.07 introduces some changes to various APIs. These have been > updated in OVS, including: > * xstats API: changes to structure of xstats > * vhost API: replace virtio-net references with 'vid' > > Signed-off-by: Ciara Loftus > Tested-by: Maxime Coquelin > > v3: > - fixed style issues > - fixed & simplified xstats frees > - use xcalloc & free instead of rte_mzalloc & rte_free for stats > - remove libnuma include > - fixed & simplified vHost NUMA set > - added flag to indicate device reconfigured at least once > - re-add call to rcu synchronise in destroy_device > - define IF_NAME_SZ and use instead of PATH_MAX > > v2: > - rebase with DPDK rc2 > - rebase with OVS master > - fix vhost cuse compilation > --- > .travis/linux-build.sh | 2 +- > INSTALL.DPDK-ADVANCED.md | 8 +- > INSTALL.DPDK.md | 20 ++--- > NEWS | 1 + > lib/netdev-dpdk.c | 220 +++--- > - > 5 files changed, 126 insertions(+), 125 deletions(-) > > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh > index 065de39..1b3d43d 100755 > --- a/.travis/linux-build.sh > +++ b/.travis/linux-build.sh > @@ -68,7 +68,7 @@ fi > > if [ "$DPDK" ]; then > if [ -z "$DPDK_VER" ]; then > - DPDK_VER="16.04" > + DPDK_VER="16.07" > fi > install_dpdk $DPDK_VER > if [ "$CC" = "clang" ]; then > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > index 9ae536d..ec1de29 100644 > --- a/INSTALL.DPDK-ADVANCED.md > +++ b/INSTALL.DPDK-ADVANCED.md > @@ -43,7 +43,7 @@ for DPDK and OVS. > For IVSHMEM case, set `export DPDK_TARGET=x86_64-ivshmem- > linuxapp-gcc` > > ``` > - export DPDK_DIR=/usr/src/dpdk-16.04 > + export DPDK_DIR=/usr/src/dpdk-16.07 > export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET > make install T=$DPDK_TARGET DESTDIR=install > ``` > @@ -339,7 +339,7 @@ For users wanting to do packet forwarding using > kernel stack below are the steps > cd /usr/src/cmdline_generator > wget https://raw.githubusercontent.com/netgroup-polito/un- > orchestrator/master/orchestrator/compute_controller/plugins/kvm- > libvirt/cmdline_generator/cmdline_generator.c > wget https://raw.githu
Re: [ovs-dev] [PATCH RFC v3 1/1] netdev-dpdk: Arbitrary 'dpdk' port naming
> 2016-07-19 2:53 GMT-07:00 Loftus, Ciara : > > > > The idea looks very good to me, thanks for working on it. > > Very high level comments: > Hi Daniele thanks for looking at this. > > > > > Do we need to be limited to pci devices? Perhaps we can accept the same > > string as rte_eth_dev_attach(). > Can you elaborate? For physical devs the string is always the PCI address. Do > you mean to include virtual devices as well? This could be an option once we > can use the ethdev API with vHost ports if the PMD gets merged. > > I agree with you that for vhost devices we can wait for vHost PMD. I was > thinking more about devices like DPDK "af_packet" and "pcap". Can we use > this interface to create those as well? Understood. It’s possible. If the string provided isn't PCI format we can assume it's a vdev and provide the args to attach() without searching through the PCI devices and trying to find a match first. I can include this in the v4. However I won't be able to thoroughly test all 20+ DPDK PMDs and ensure the attach() works for them all. I tested a few - some worked out of the box eg. eth_null, some didn’t eg af_packet. I imagine that the netdev_class dpdk_class functions only happen to be compatible with some PMDs straight away. Those that aren't compatible will require new port types (and modifications to existing / new netdev functions) which I think is beyond the scope of this patch. Thanks, Ciara > Thanks, > Daniele > > > > Would it be possible to integrate this more with the hotplug patch? It > would > > be nice to avoid introducing extra appctl commands and call > > rte_eth_dev_attach() if needed in netdev_dpdk_construct(). > Good idea. I'll look at this for the v4. > > Thanks, > Ciara > > > Thoughts? > > Thanks, > > Daniele > > > > 2016-07-15 9:34 GMT-07:00 Ciara Loftus : > > 'dpdk' ports no longer have naming restrictions. Now, instead > > of specifying the dpdk port ID as part of the name, the PCI > > address of the device must be specified via the 'dpdk-pci' > > option. eg. > > > > ovs-vsctl add-port br0 my-port > > ovs-vsctl set Interface my-port type=dpdk > > ovs-vsctl set Interface my-port options:dpdk-pci=:06:00.3 > > > > Signed-off-by: Ciara Loftus > > > > v2: > > - remove global pci list > > - remove unnecessary parenthesis > > - remove return from void fn > > - print pci like dpdk > > - fix port ranges > > --- > > INSTALL.DPDK-ADVANCED.md | 2 +- > > INSTALL.DPDK.md | 10 ++-- > > NEWS | 2 + > > lib/netdev-dpdk.c | 132 > > ++- > > 4 files changed, 116 insertions(+), 30 deletions(-) > > > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > > index 61b4e82..7370d03 100644 > > --- a/INSTALL.DPDK-ADVANCED.md > > +++ b/INSTALL.DPDK-ADVANCED.md > > @@ -854,7 +854,7 @@ At this point, the user can create a ovs port using > the > > add-port command. > > It is also possible to detach a port from ovs, the user has to remove the > > port using the del-port command, then it can be detached using: > > > > -`ovs-appctl netdev-dpdk/port-detach dpdk0` > > +`ovs-appctl netdev-dpdk/port-detach :01:00.0` > > > > This feature is not supported with VFIO and could not work with some > NICs, > > please refer to the [DPDK Port Hotplug Framework] in order to get more > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > > index 5407794..9a781ff 100644 > > --- a/INSTALL.DPDK.md > > +++ b/INSTALL.DPDK.md > > @@ -258,13 +258,13 @@ advanced install guide [INSTALL.DPDK- > > ADVANCED.md] > > > > `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev` > > > > - Now you can add DPDK devices. OVS expects DPDK device names to > start > > with > > - "dpdk" and end with a portid. vswitchd should print (in the log file) > > the > > - number of dpdk devices found. > > + Now you can add dpdk devices. The PCI address of the device needs to > > be > > + set using the 'dpdk-pci' option. vswitchd should print (in the log > > file) > > + the PCI addresses of dpdk devices found during initialisation. > > > > ``` > > - ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk > > - ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk > > + ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk > > options:dpdk-pc
Re: [ovs-dev] [PATCH 3/4] netdev-dpdk: Add vHost User PMD
> Subject: Re: [ovs-dev] [PATCH 3/4] netdev-dpdk: Add vHost User PMD > > Thanks for the patch > This needs a little bit of rebasing, I did it myself to review, but it'd be > nice to > have an updated version. I've submitted a new set here: http://openvswitch.org/pipermail/dev/2016-July/076245.html > I like the simplification that this brings especially to the fast path. > If we merge this before we merge the DPDK 16.07 we won't have to deal > with the vid change. Sounds good. Thanks for the review. Replies inline. Thanks, Ciara > Thanks, > Daniele > > 2016-07-15 7:26 GMT-07:00 Ciara Loftus : > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports > to be controlled by the librte_ether API, like physical 'dpdk' ports and > IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and > removes direct calls to the librte_vhost DPDK library. > > This commit removes extended statistics support for vHost User ports > until such a time that this becomes available in the vHost PMD in a > DPDK release supported by OVS. > > Signed-off-by: Ciara Loftus > --- > INSTALL.DPDK.md | 10 + > NEWS | 2 + > lib/netdev-dpdk.c | 856 ++--- > - > 3 files changed, 302 insertions(+), 566 deletions(-) > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index 5407794..29b6f91 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -561,6 +561,16 @@ can be found in [Vhost Walkthrough]. > > http://dpdk.org/doc/guides/rel_notes/release_16_04.html > > + - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context > of > + DPDK as they are all managed by the rte_ether API. This means that they > + adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS > which by > + default is set to 32. This means by default the combined total number of > + dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32. > This > + value can be changed if desired by modifying the configuration file in > + DPDK, or by overriding the default value on the command line when > building > + DPDK. eg. > + > + `make install CONFIG_RTE_MAX_ETHPORTS=64` > > Again, I hope this doesn't cause problems to a lot of users. I'd like to see > the > limit increased by default, but I think we can merge this patch as it is. Agreed. > > > Bug Reporting: > -- > diff --git a/NEWS b/NEWS > index aa1b915..b3791ed 100644 > --- a/NEWS > +++ b/NEWS > @@ -59,6 +59,8 @@ Post-v2.5.0 > node that device memory is located on if > CONFIG_RTE_LIBRTE_VHOST_NUMA > is enabled in DPDK. > * Remove dpdkvhostcuse port type. > + * vHost PMD integration brings vhost-user ports under control of the > + rte_ether DPDK API. > - Increase number of registers to 16. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index b4f82af..5de806a 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -56,6 +56,7 @@ > #include "unixctl.h" > > #include "rte_config.h" > +#include "rte_eth_vhost.h" > #include "rte_mbuf.h" > #include "rte_meter.h" > #include "rte_virtio_net.h" > @@ -141,6 +142,11 @@ static char *vhost_sock_dir = NULL; /* Location of > vhost-user sockets */ > > #define VHOST_ENQ_RETRY_NUM 8 > > +/* Array that tracks the used & unused vHost user driver IDs */ > +static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS]; > +/* Maximum string length allowed to provide to rte_eth_attach function */ > +#define DEVARGS_MAX (RTE_ETH_NAME_MAX_LEN + PATH_MAX + 18) > + > > I think this is not needed if we use xasprintf() below. Removed in v2 > > static const struct rte_eth_conf port_conf = { > .rxmode = { > .mq_mode = ETH_MQ_RX_RSS, > @@ -353,12 +359,15 @@ struct netdev_dpdk { > * always true. */ > bool txq_needs_locking; > > - /* virtio-net structure for vhost device */ > - OVSRCU_TYPE(struct virtio_net *) virtio_dev; > + /* Number of virtqueue pairs reported by the guest */ > + uint32_t vhost_qp_nb; > > /* Identifier used to distinguish vhost devices from each other */ > char vhost_id[PATH_MAX]; > > + /* ID of vhost user port given to the PMD driver */ > + unsigned int vhost_pmd_id; > + > /* In dpdk_list. */ > struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); > > @@ -389,16 +398,25 @@ struct netdev_rxq_dpdk { > static bool dpdk_thread_is_pmd(void); > > static int netdev_dpdk_construct(struct netdev *); > +static int netdev_dpdk_vhost_construct(struct netdev *); > > struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev); > > struct ingress_policer * > netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev); > > +void link_status_changed_callback(uint8_t port_id, > + enum rte_eth_event_type type OVS_UNUSED, void *param > OVS_UNUSED); > +void vring_state_changed_ca
Re: [ovs-dev] Backport Request: dpif-netdev: Remove PMD latency on seq_mutex
> > Thanks Flavio for checking and Daniel for your consideration. > Indeed the issue exists in 2.5 branch. > > We are treating this more in the bucket of a performance bug fix than a > feature. > > Any specific testing that you would like to see run to help reduce > your concern related to changes to the core modules ? > > Ciara, what's your opinion on these changes for a backport ? I'm of the same opinion as yourself and Flavio that this is more a fix than a feature. I'd like to see it backported. But I understand there may be some risk associated with due to the nature of the changes. Thanks, Ciara > > Thanks > Vinod > > > > -Original Message- > From: Daniele Di Proietto [mailto:diproiet...@vmware.com] > Sent: Friday, July 22, 2016 3:49 PM > To: Flavio Leitner ; Vinod, Chegu > > Cc: kris...@redhat.com; ovs-dev ; Loftus, Ciara > > Subject: Re: Backport Request: dpif-netdev: Remove PMD latency on > seq_mutex > > I'm not sure I'm 100% comfortable back porting this to branch-2.5 > > I see the change more as a feature rather than a bugfix. > > Also it touches some core modules (seq and rcu) in a non trivial way. > > > What do you guys think? > > Thanks, > > Daniele > > On 22/07/2016 15:03, "Flavio Leitner" wrote: > > >(adding ovs-dev mailing list and more people interesting on the > >backport to CC) > > > >On Mon, Jul 18, 2016 at 05:31:52AM +, Vinod, Chegu wrote: > >> Hi Flavio, Karl, > >> > >> Is there a version of the following fix available that is compatible with > >> OVS > 2.5? > >> > >> > https://github.com/openvswitch/ovs/commit/9dede5cff553d7c4e074f04c52 > 5 > >> c1417eb209363 > >> > >> If yes can it backported to the 2.5 branch ? > > > >branch-2.5 is affected by the same issue. I tested the patch from > >branch master (cherry-pick) and it solved the issue. > > > >Daniele, > > > >What do you think? If you agree, do you need me to post the backported > >patch or is it enough for you to cherry-pick? > > > >Thanks, > >-- > >fbl ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [ovs-dev, RFC, v2, 1/1] netdev-dpdk: Add support for DPDK 16.07
> > On 12.07.2016 12:11, Ciara Loftus wrote: > > This commit introduces support for DPDK 16.07 and consequently breaks > > compatibility with DPDK 16.04. > > > > DPDK 16.07 introduces some changes to various APIs. These have been > > updated in OVS, including: > > * xstats API: changes to structure of xstats > > * vhost API: replace virtio-net references with 'vid' > > > > Signed-off-by: Ciara Loftus > > --- > > .travis/linux-build.sh | 2 +- > > INSTALL.DPDK-ADVANCED.md | 8 +- > > INSTALL.DPDK.md | 20 ++-- > > lib/netdev-dpdk.c| 243 +++ > > > 4 files changed, 135 insertions(+), 138 deletions(-) > > > > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh > > index 065de39..1b3d43d 100755 > > --- a/.travis/linux-build.sh > > +++ b/.travis/linux-build.sh > > @@ -68,7 +68,7 @@ fi > > > > if [ "$DPDK" ]; then > > if [ -z "$DPDK_VER" ]; then > > -DPDK_VER="16.04" > > +DPDK_VER="16.07" > > fi > > install_dpdk $DPDK_VER > > if [ "$CC" = "clang" ]; then > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > > index 9ae536d..ec1de29 100644 > > --- a/INSTALL.DPDK-ADVANCED.md > > +++ b/INSTALL.DPDK-ADVANCED.md > > @@ -43,7 +43,7 @@ for DPDK and OVS. > > For IVSHMEM case, set `export DPDK_TARGET=x86_64-ivshmem- > linuxapp-gcc` > > > > ``` > > -export DPDK_DIR=/usr/src/dpdk-16.04 > > +export DPDK_DIR=/usr/src/dpdk-16.07 > > export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET > > make install T=$DPDK_TARGET DESTDIR=install > > ``` > > @@ -339,7 +339,7 @@ For users wanting to do packet forwarding using > kernel stack below are the steps > > cd /usr/src/cmdline_generator > > wget https://raw.githubusercontent.com/netgroup-polito/un- > orchestrator/master/orchestrator/compute_controller/plugins/kvm- > libvirt/cmdline_generator/cmdline_generator.c > > wget https://raw.githubusercontent.com/netgroup-polito/un- > orchestrator/master/orchestrator/compute_controller/plugins/kvm- > libvirt/cmdline_generator/Makefile > > - export RTE_SDK=/usr/src/dpdk-16.04 > > + export RTE_SDK=/usr/src/dpdk-16.07 > > export RTE_TARGET=x86_64-ivshmem-linuxapp-gcc > > make > > ./build/cmdline_generator -m -p dpdkr0 XXX > > @@ -363,7 +363,7 @@ For users wanting to do packet forwarding using > kernel stack below are the steps > > mount -t hugetlbfs nodev /dev/hugepages (if not already mounted) > > > > # Build the DPDK ring application in the VM > > - export RTE_SDK=/root/dpdk-16.04 > > + export RTE_SDK=/root/dpdk-16.07 > > export RTE_TARGET=x86_64-ivshmem-linuxapp-gcc > > make > > > > @@ -374,7 +374,7 @@ For users wanting to do packet forwarding using > kernel stack below are the steps > > > > ## 6. Vhost Walkthrough > > > > -DPDK 16.04 supports two types of vhost: > > +DPDK 16.07 supports two types of vhost: > > > > 1. vhost-user - enabled default > > > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > > index 5407794..9022ad8 100644 > > --- a/INSTALL.DPDK.md > > +++ b/INSTALL.DPDK.md > > @@ -21,7 +21,7 @@ The DPDK support of Open vSwitch is considered > 'experimental'. > > > > ### Prerequisites > > > > -* Required: DPDK 16.04, libnuma > > +* Required: DPDK 16.07, libnuma > > * Hardware: [DPDK Supported NICs] when physical ports in use > > > > ## 2. Building and Installation > > @@ -42,10 +42,10 @@ advanced install guide [INSTALL.DPDK- > ADVANCED.md] > > > > ``` > > cd /usr/src/ > > - wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.04.zip > > - unzip dpdk-16.04.zip > > + wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.07.zip > > + unzip dpdk-16.07.zip > > > > - export DPDK_DIR=/usr/src/dpdk-16.04 > > + export DPDK_DIR=/usr/src/dpdk-16.07 > > cd $DPDK_DIR > > ``` > > > > @@ -329,9 +329,9 @@ can be found in [Vhost Walkthrough]. > > > >``` > >cd /root/dpdk/ > > - wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.04.zip > > - unzip dpdk-16.04.zip > > - export DPDK_DIR=/root/dpdk/dpdk-16.04 > > + wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.07.zip > > + unzip dpdk-16.07.zip > > + export DPDK_DIR=/root/dpdk/dpdk-16.07 > >export DPDK_TARGET=x86_64-native-linuxapp-gcc > >export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET > >cd $DPDK_DIR > > @@ -487,7 +487,7 @@ can be found in [Vhost Walkthrough]. > > > > > > > > - > > + > > > > > > > > @@ -557,9 +557,9 @@ can be found in [Vhost Walkthrough]. > > DPDK. It is recommended that users update Network Interface firmware > to > > match what has been validated for the DPDK release. > > > > -For DPDK 16.04, the list of validated firmware versions can be found > > at: > > +For DPDK 16.07, the list of validated firmware versions can be found > > at: >
Re: [ovs-dev] [RFC PATCH v2 1/1] netdev-dpdk: Add support for DPDK 16.07
> > Hi Ciara, > thanks for the patch. > It mostly looks good to me, except a few comments inline > Thanks, > Daniele Thanks for the review Daniele. I've pushed a new version that includes your suggestions. Responses inline. > > 2016-07-12 2:11 GMT-07:00 Ciara Loftus : > This commit introduces support for DPDK 16.07 and consequently breaks > compatibility with DPDK 16.04. > > DPDK 16.07 introduces some changes to various APIs. These have been > updated in OVS, including: > * xstats API: changes to structure of xstats > * vhost API: replace virtio-net references with 'vid' > > Signed-off-by: Ciara Loftus > > --- > .travis/linux-build.sh | 2 +- > INSTALL.DPDK-ADVANCED.md | 8 +- > INSTALL.DPDK.md | 20 ++-- > lib/netdev-dpdk.c | 243 +++--- > - > 4 files changed, 135 insertions(+), 138 deletions(-) > > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh > index 065de39..1b3d43d 100755 > --- a/.travis/linux-build.sh > +++ b/.travis/linux-build.sh > @@ -68,7 +68,7 @@ fi > > if [ "$DPDK" ]; then > if [ -z "$DPDK_VER" ]; then > - DPDK_VER="16.04" > + DPDK_VER="16.07" > I wanted to test it on travis, but the files are not there yet :) > > fi > install_dpdk $DPDK_VER > if [ "$CC" = "clang" ]; then > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > index 9ae536d..ec1de29 100644 > --- a/INSTALL.DPDK-ADVANCED.md > +++ b/INSTALL.DPDK-ADVANCED.md > @@ -43,7 +43,7 @@ for DPDK and OVS. > For IVSHMEM case, set `export DPDK_TARGET=x86_64-ivshmem- > linuxapp-gcc` > > ``` > - export DPDK_DIR=/usr/src/dpdk-16.04 > + export DPDK_DIR=/usr/src/dpdk-16.07 > export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET > make install T=$DPDK_TARGET DESTDIR=install > ``` > @@ -339,7 +339,7 @@ For users wanting to do packet forwarding using > kernel stack below are the steps > cd /usr/src/cmdline_generator > wget https://raw.githubusercontent.com/netgroup-polito/un- > orchestrator/master/orchestrator/compute_controller/plugins/kvm- > libvirt/cmdline_generator/cmdline_generator.c > wget https://raw.githubusercontent.com/netgroup-polito/un- > orchestrator/master/orchestrator/compute_controller/plugins/kvm- > libvirt/cmdline_generator/Makefile > - export RTE_SDK=/usr/src/dpdk-16.04 > + export RTE_SDK=/usr/src/dpdk-16.07 > export RTE_TARGET=x86_64-ivshmem-linuxapp-gcc > make > ./build/cmdline_generator -m -p dpdkr0 XXX > @@ -363,7 +363,7 @@ For users wanting to do packet forwarding using > kernel stack below are the steps > mount -t hugetlbfs nodev /dev/hugepages (if not already mounted) > > # Build the DPDK ring application in the VM > - export RTE_SDK=/root/dpdk-16.04 > + export RTE_SDK=/root/dpdk-16.07 > export RTE_TARGET=x86_64-ivshmem-linuxapp-gcc > make > > @@ -374,7 +374,7 @@ For users wanting to do packet forwarding using > kernel stack below are the steps > > ## 6. Vhost Walkthrough > > -DPDK 16.04 supports two types of vhost: > +DPDK 16.07 supports two types of vhost: > > 1. vhost-user - enabled default > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index 5407794..9022ad8 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -21,7 +21,7 @@ The DPDK support of Open vSwitch is considered > 'experimental'. > > ### Prerequisites > > -* Required: DPDK 16.04, libnuma > +* Required: DPDK 16.07, libnuma > * Hardware: [DPDK Supported NICs] when physical ports in use > > ## 2. Building and Installation > @@ -42,10 +42,10 @@ advanced install guide [INSTALL.DPDK- > ADVANCED.md] > > ``` > cd /usr/src/ > - wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.04.zip > - unzip dpdk-16.04.zip > + wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.07.zip > + unzip dpdk-16.07.zip > > - export DPDK_DIR=/usr/src/dpdk-16.04 > + export DPDK_DIR=/usr/src/dpdk-16.07 > cd $DPDK_DIR > ``` > > @@ -329,9 +329,9 @@ can be found in [Vhost Walkthrough]. > > ``` > cd /root/dpdk/ > - wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.04.zip > - unzip dpdk-16.04.zip > - export DPDK_DIR=/root/dpdk/dpdk-16.04 > + wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.07.zip > + unzip dpdk-16.07.zip > + export DPDK_DIR=/root/dpdk/dpdk-16.07 > export DPDK_TARGET=x86_64-native-linuxapp-gcc > export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET > cd $DPDK_DIR > @@ -487,7 +487,7 @@ can be found in [Vhost Walkthrough]. > > > > - > + > > > > @@ -557,9 +557,9 @@ can be found in [Vhost Walkthrough]. > DPDK. It is recommended that users update Network Interface firmware > to > match what has been validated for the DPDK release. > > - For DPDK 16.04, the list of validated firmware versions can be found at: > + For DPDK 16
Re: [ovs-dev] [PATCH RFC v3 1/1] netdev-dpdk: Arbitrary 'dpdk' port naming
> > The idea looks very good to me, thanks for working on it. > Very high level comments: Hi Daniele thanks for looking at this. > > Do we need to be limited to pci devices? Perhaps we can accept the same > string as rte_eth_dev_attach(). Can you elaborate? For physical devs the string is always the PCI address. Do you mean to include virtual devices as well? This could be an option once we can use the ethdev API with vHost ports if the PMD gets merged. > Would it be possible to integrate this more with the hotplug patch? It would > be nice to avoid introducing extra appctl commands and call > rte_eth_dev_attach() if needed in netdev_dpdk_construct(). Good idea. I'll look at this for the v4. Thanks, Ciara > Thoughts? > Thanks, > Daniele > > 2016-07-15 9:34 GMT-07:00 Ciara Loftus : > 'dpdk' ports no longer have naming restrictions. Now, instead > of specifying the dpdk port ID as part of the name, the PCI > address of the device must be specified via the 'dpdk-pci' > option. eg. > > ovs-vsctl add-port br0 my-port > ovs-vsctl set Interface my-port type=dpdk > ovs-vsctl set Interface my-port options:dpdk-pci=:06:00.3 > > Signed-off-by: Ciara Loftus > > v2: > - remove global pci list > - remove unnecessary parenthesis > - remove return from void fn > - print pci like dpdk > - fix port ranges > --- > INSTALL.DPDK-ADVANCED.md | 2 +- > INSTALL.DPDK.md | 10 ++-- > NEWS | 2 + > lib/netdev-dpdk.c | 132 > ++- > 4 files changed, 116 insertions(+), 30 deletions(-) > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > index 61b4e82..7370d03 100644 > --- a/INSTALL.DPDK-ADVANCED.md > +++ b/INSTALL.DPDK-ADVANCED.md > @@ -854,7 +854,7 @@ At this point, the user can create a ovs port using the > add-port command. > It is also possible to detach a port from ovs, the user has to remove the > port using the del-port command, then it can be detached using: > > -`ovs-appctl netdev-dpdk/port-detach dpdk0` > +`ovs-appctl netdev-dpdk/port-detach :01:00.0` > > This feature is not supported with VFIO and could not work with some NICs, > please refer to the [DPDK Port Hotplug Framework] in order to get more > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index 5407794..9a781ff 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -258,13 +258,13 @@ advanced install guide [INSTALL.DPDK- > ADVANCED.md] > > `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev` > > - Now you can add DPDK devices. OVS expects DPDK device names to start > with > - "dpdk" and end with a portid. vswitchd should print (in the log file) > the > - number of dpdk devices found. > + Now you can add dpdk devices. The PCI address of the device needs to > be > + set using the 'dpdk-pci' option. vswitchd should print (in the log file) > + the PCI addresses of dpdk devices found during initialisation. > > ``` > - ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk > - ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk > + ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk > options:dpdk-pci=:06:00.0 > + ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk > options:dpdk-pci=:06:00.1 > ``` > > After the DPDK ports get added to switch, a polling thread continuously > polls > diff --git a/NEWS b/NEWS > index 9064225..03b9ba8 100644 > --- a/NEWS > +++ b/NEWS > @@ -59,6 +59,8 @@ Post-v2.5.0 > node that device memory is located on if > CONFIG_RTE_LIBRTE_VHOST_NUMA > is enabled in DPDK. > * Port Hotplug is now supported. > + * DPDK physical ports can now have arbitrary names. The PCI address of > + the device must be set using the 'dpdk-pci' option. > - Increase number of registers to 16. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 3fab52c..d2cceb2 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -58,6 +58,7 @@ > #include "rte_config.h" > #include "rte_mbuf.h" > #include "rte_meter.h" > +#include "rte_pci.h" > #include "rte_virtio_net.h" > > VLOG_DEFINE_THIS_MODULE(dpdk); > @@ -736,7 +737,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned > int port_no, > /* If the 'sid' is negative, it means that the kernel fails > * to obtain the pci numa info. In that situation, always > * use 'SOCKET0'. */ > - if (type == DPDK_DEV_ETH) { > + if (type == DPDK_DEV_ETH && dev->port_id != -1) { > sid = rte_eth_dev_socket_id(port_no); > } else { > sid = rte_lcore_to_socket_id(rte_get_master_lcore()); > @@ -772,9 +773,11 @@ netdev_dpdk_init(struct netdev *netdev, unsigned > int port_no, > dev->requested_n_txq = netdev->n_txq; > > if (type == DPDK_DEV_ETH) { > - err = dpdk_eth_dev_init(dev); > -
Re: [ovs-dev] [RFC PATCH v2 0/1] netdev-dpdk: Add support for DPDK 16.07
> > Hi Ciara, > > On 07/12/2016 11:11 AM, Ciara Loftus wrote: > > This RFC patch provides initial support for DPDK 16.07-rc2 (release > > candidate 2) which was tagged July 12th. I plan to submit a full patch > > once the 16.07 release is final, hopefully later this month. This RFC > > patch can be used in meantime to test with the DPDK master branch or > > 16.07-rc2 tag. > > > > The main changes between DPDK 16.04 and 16.07 so far that impact OVS > are > > changes to the xstats and vhost APIs which have required rework to those > > particular sections in netdev-dpdk.c. > > > > As this is an RFC patch only basic testing has been conducted. If you > > try this patch out and find any issues please report them and fixes can > > be included in the final patch. > > > > v2: > > - rebase with DPDK rc2 > > - rebase with OVS master > > - fix vhost cuse compilation > > > > .travis/linux-build.sh | 2 +- > > INSTALL.DPDK-ADVANCED.md | 8 +- > > INSTALL.DPDK.md | 20 ++-- > > lib/netdev-dpdk.c| 243 +++ > > > 4 files changed, 135 insertions(+), 138 deletions(-) > > > > FWIW, you can add my: > Tested-by: Maxime Coquelin Will do. Thanks for testing! Ciara > > Thanks! > Maxime ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [RFC PATCH v2 1/1] netdev-dpdk: Arbitrary 'dpdk' port naming
> On Fri, Jul 15, 2016 at 11:00 AM, Loftus, Ciara > wrote: > > > > Hello Ciara, > > I like the idea a lot, the restriction on the names has always been a > limitation, > > however, it is more important the port id to physical port relation that is > > confusing. > > I was not able to test the patch, it does not apply and I didn't have the > > time > > to apply it manually. > > Thanks for the review Mauricio! > The latest hotplug patch fails to apply also. Do you plan to submit a new > version soon? Once you do I'll rework this according to your suggestions and > send out a v3. > > I sent v7 rebased to master: http://openvswitch.org/pipermail/dev/2016- > July/075350.html Thanks Mauricio. I rebased mine on your v7, addressed your review comments and sent a v3: http://openvswitch.org/pipermail/dev/2016-July/075386.html Thanks, Ciara > Thanks, > Mauricio V > > Thanks, > Ciara > > > > > I have some comments inline. > > > > On Fri, Jul 1, 2016 at 11:29 AM, Ciara Loftus > > wrote: > > 'dpdk' ports no longer have naming restrictions. Now, instead > > of specifying the dpdk port ID as part of the name, the PCI > > address of the device must be specified via the 'dpdk-pci' > > option. eg. > > > > ovs-vsctl add-port br0 my-port > > ovs-vsctl set Interface my-port type=dpdk > > ovs-vsctl set Interface my-port options:dpdk-pci=:06:00.3 > > > > Signed-off-by: Ciara Loftus > > --- > > INSTALL.DPDK.md | 12 ++--- > > NEWS | 2 + > > lib/netdev-dpdk.c | 142 > > +- > > 3 files changed, 127 insertions(+), 29 deletions(-) > > > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > > index 28c5b90..ad2fcbf 100644 > > --- a/INSTALL.DPDK.md > > +++ b/INSTALL.DPDK.md > > @@ -208,13 +208,13 @@ Using the DPDK with ovs-vswitchd: > > > > `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev` > > > > - Now you can add dpdk devices. OVS expects DPDK device names to start > > with > > - "dpdk" and end with a portid. vswitchd should print (in the log file) > > the > > - number of dpdk devices found. > > + Now you can add dpdk devices. The PCI address of the device needs to > be > > + set using the 'dpdk-pci' option. vswitchd should print (in the log file) > > + the number and PCI addresses of dpdk devices found during > initialisation. > > > > ``` > > - ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk > > - ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk > > + ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk > > options:dpdk-pci=:06:00.0 > > + ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk > > options:dpdk-pci=:06:00.1 > > ``` > > > > Once first DPDK port is added to vswitchd, it creates a Polling thread > > and > > @@ -304,7 +304,7 @@ Using the DPDK with ovs-vswitchd: > > It is also possible to detach a port from ovs, the user has to remove > > the > > port using the del-port command, then it can be detached using: > > > > - `ovs-appctl netdev-dpdk/port-detach dpdk0` > > + `ovs-appctl netdev-dpdk/port-detach :01:00.0` > > > > This feature is not supported with VFIO and could not work with some > > NICs, > > please refer to the [DPDK Port Hotplug Framework] in order to get more > > diff --git a/NEWS b/NEWS > > index a1146b0..db702b7 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -49,6 +49,8 @@ Post-v2.5.0 > > node that device memory is located on if > > CONFIG_RTE_LIBRTE_VHOST_NUMA > > is enabled in DPDK. > > * Port Hotplug is now supported. > > + * DPDK physical ports can now have arbitrary names. The PCI address > of > > + the device must be set using the 'dpdk-pci' option. > > - ovs-benchmark: This utility has been removed due to lack of use and > > bitrot. > > - ovs-appctl: > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 857339a..8e69f3a 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -144,6 +144,10 @@ static char *vhost_sock_dir = NULL; /* Location of > > vhost-user sockets */ > > > > #define VHOST_ENQ_RETRY_NUM 8 > > > > +static uint8_t nb_ports; /* Number of DPDK ports initialised */ > > +struct rte_pci_addr pci_devs[RTE_MAX_ETHPORTS]; /* PCI info of >
Re: [ovs-dev] [PATCH] netdev-dpdk : vhost-user port link state fix
> > I changed 'netdev' to 'dev' in netdev_dpdk_init(), added Jan to AUTHORS > and > pushed this to master. Another backport request - could this be pushed to 2.5 too? Thanks, Ciara > > Thanks! > > Daniele > > 2016-06-02 5:42 GMT-07:00 Zoltán Balogh : > > > Hi Daniele, > > > > I fixed the patch based on your comments: > > > > OVS reports that link state of a vhost-user port (type=dpdkvhostuser) is > > DOWN, even when traffic is running through the port between a Virtual > > Machine and the vSwitch. > > Changing admin state with the "ovs-ofctl mod-port > up/down" > > command over OpenFlow does affect neither the reported link state nor > the > > traffic. > > > > The patch below does the flowing: > > - Triggers link state change by altering netdev's change_seq member. > > - Controls sending/receiving of packets through vhost-user port according > > to the port's current admin state. > > - Sets admin state of newly created vhost-user port to UP. > > > > Signed-off-by: Zoltán Balogh > > Co-authored-by: Jan Scheurich > > Signed-off-by: Jan Scheurich > > > > --- > > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 6cae930..9473bdb 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -797,6 +797,8 @@ netdev_dpdk_init(struct netdev *netdev, unsigned > int > > port_no, > > } > > } else { > > netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM); > > +/* Enable DPDK_DEV_VHOST device and set promiscuous mode flag. > */ > > +netdev->flags = NETDEV_UP | NETDEV_PROMISC; > > > } > > > > ovs_list_push_back(&dpdk_list, &dev->list_node); > > @@ -1256,7 +1258,8 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq > *rxq, > > uint16_t nb_rx = 0; > > uint16_t dropped = 0; > > > > -if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) { > > +if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) > > + || !(dev->flags & NETDEV_UP))) { > > return EAGAIN; > > } > > > > @@ -1378,7 +1381,8 @@ __netdev_dpdk_vhost_send(struct netdev > *netdev, int > > qid, > > > > qid = dev->tx_q[qid % dev->real_n_txq].map; > > > > -if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0)) { > > +if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0 > > + || !(dev->flags & NETDEV_UP))) { > > rte_spinlock_lock(&dev->stats_lock); > > dev->stats.tx_dropped+= cnt; > > rte_spinlock_unlock(&dev->stats_lock); > > @@ -2117,6 +2121,23 @@ netdev_dpdk_update_flags__(struct > netdev_dpdk *dev, > > if (!(dev->flags & NETDEV_UP)) { > > rte_eth_dev_stop(dev->port_id); > > } > > +} else { > > +/* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and > > vhost is > > + * running then change netdev's change_seq to trigger link state > > + * update. */ > > +struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev); > > + > > +if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off))) > > +&& is_vhost_running(virtio_dev)) { > > +netdev_change_seq_changed(&dev->up); > > + > > +/* Clear statistics if device is getting up. */ > > +if (NETDEV_UP & on) { > > +rte_spinlock_lock(&dev->stats_lock); > > +memset(&dev->stats, 0, sizeof(dev->stats)); > > +rte_spinlock_unlock(&dev->stats_lock); > > +} > > +} > > } > > > > return 0; > > @@ -2339,6 +2360,7 @@ new_device(struct virtio_net *virtio_dev) > > virtio_dev->flags |= VIRTIO_DEV_RUNNING; > > /* Disable notifications. */ > > set_irq_status(virtio_dev); > > +netdev_change_seq_changed(&dev->up); > > ovs_mutex_unlock(&dev->mutex); > > break; > > } > > @@ -2390,6 +2412,7 @@ destroy_device(volatile struct virtio_net > > *virtio_dev) > > ovsrcu_set(&dev->virtio_dev, NULL); > > netdev_dpdk_txq_map_clear(dev); > > exists = true; > > +netdev_change_seq_changed(&dev->up); > > ovs_mutex_unlock(&dev->mutex); > > break; > > } > > > > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v4] dpif-netdev: Remove PMD latency on seq_mutex
> > I applied this to master with the below incremental. > > We _usually_ use positive error numbers in int return value. > > I think there was an extra COVERAGE_INC(seq_change) > > Thanks for the patch! Hi, Could this be backported to the 2.5 branch? Thanks, Ciara > > diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c > index 7462579..8aef1f1 100644 > --- a/lib/ovs-rcu.c > +++ b/lib/ovs-rcu.c > @@ -157,7 +157,7 @@ int > ovsrcu_try_quiesce(void) > { > struct ovsrcu_perthread *perthread; > -int ret = -EBUSY; > +int ret = EBUSY; > > ovs_assert(!single_threaded()); > perthread = ovsrcu_perthread_get(); > diff --git a/lib/seq.c b/lib/seq.c > index 4e99c6c..b8b5b65 100644 > --- a/lib/seq.c > +++ b/lib/seq.c > @@ -138,8 +138,6 @@ void > seq_change(struct seq *seq) > OVS_EXCLUDED(seq_mutex) > { > -COVERAGE_INC(seq_change); > - > ovs_mutex_lock(&seq_mutex); > seq_change_protected(seq); > ovs_mutex_unlock(&seq_mutex); > > > > 2016-07-05 6:33 GMT-07:00 Flavio Leitner : > > > The PMD thread needs to keep processing RX queues in order > > to achieve maximum throughput. It also needs to sweep emc > > cache and quiesce which use seq_mutex. That mutex can > > eventually block the PMD thread causing latency spikes and > > affecting the throughput. > > > > Since there is no requirement for running those tasks at a > > specific time, this patch extend seq API to allow tentative > > locking instead. > > > > Reported-by: Karl Rister > > Co-authored-by: Karl Rister > > Signed-off-by: Flavio Leitner > > --- > > lib/dpif-netdev.c | 5 +++-- > > lib/ovs-rcu.c | 37 +++-- > > lib/ovs-rcu.h | 1 + > > lib/seq.c | 49 > ++--- > > lib/seq.h | 5 + > > 5 files changed, 90 insertions(+), 7 deletions(-) > > > > v4: > >- return EBUSY if lock is busy. > > > > v3: > >- addressed clang annotation feedbacks from Daniele > >- tested over 4 days without spikes or other issues > > > > v2: > >- expanded SEQ API instead of using recursive lock. > > > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > > index 37c2631..dc5b02e 100644 > > --- a/lib/dpif-netdev.c > > +++ b/lib/dpif-netdev.c > > @@ -2869,9 +2869,10 @@ reload: > > > > lc = 0; > > > > -emc_cache_slow_sweep(&pmd->flow_cache); > > coverage_try_clear(); > > -ovsrcu_quiesce(); > > +if (!ovsrcu_try_quiesce()) { > > +emc_cache_slow_sweep(&pmd->flow_cache); > > +} > > > > atomic_read_relaxed(&pmd->change_seq, &seq); > > if (seq != port_seq) { > > diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c > > index 0ff508e..7462579 100644 > > --- a/lib/ovs-rcu.c > > +++ b/lib/ovs-rcu.c > > @@ -15,6 +15,7 @@ > > */ > > > > #include > > +#include > > #include "ovs-rcu.h" > > #include "fatal-signal.h" > > #include "guarded-list.h" > > @@ -57,6 +58,7 @@ static struct guarded_list flushed_cbsets; > > static struct seq *flushed_cbsets_seq; > > > > static void ovsrcu_init_module(void); > > +static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool); > > static void ovsrcu_flush_cbset(struct ovsrcu_perthread *); > > static void ovsrcu_unregister__(struct ovsrcu_perthread *); > > static bool ovsrcu_call_postponed(void); > > @@ -151,6 +153,27 @@ ovsrcu_quiesce(void) > > ovsrcu_quiesced(); > > } > > > > +int > > +ovsrcu_try_quiesce(void) > > +{ > > +struct ovsrcu_perthread *perthread; > > +int ret = -EBUSY; > > + > > +ovs_assert(!single_threaded()); > > +perthread = ovsrcu_perthread_get(); > > +if (!seq_try_lock()) { > > +perthread->seqno = seq_read_protected(global_seqno); > > +if (perthread->cbset) { > > +ovsrcu_flush_cbset__(perthread, true); > > +} > > +seq_change_protected(global_seqno); > > +seq_unlock(); > > +ovsrcu_quiesced(); > > +ret = 0; > > +} > > +return ret; > > +} > > + > > bool > > ovsrcu_is_quiescent(void) > > { > > @@ -292,7 +315,7 @@ ovsrcu_postpone_thread(void *arg OVS_UNUSED) > > } > > > > static void > > -ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread) > > +ovsrcu_flush_cbset__(struct ovsrcu_perthread *perthread, bool > protected) > > { > > struct ovsrcu_cbset *cbset = perthread->cbset; > > > > @@ -300,11 +323,21 @@ ovsrcu_flush_cbset(struct ovsrcu_perthread > > *perthread) > > guarded_list_push_back(&flushed_cbsets, &cbset->list_node, > > SIZE_MAX); > > perthread->cbset = NULL; > > > > -seq_change(flushed_cbsets_seq); > > +if (protected) { > > +seq_change_protected(flushed_cbsets_seq); > > +} else { > > +seq_change(flushed_cbsets_seq); > > +} > > } > > } > > > > static void > > +ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread) > > +{ > > +ovsrcu_flush_cbset__(perthread, fal
Re: [ovs-dev] [RFC PATCH v2 1/1] netdev-dpdk: Arbitrary 'dpdk' port naming
> > Hello Ciara, > I like the idea a lot, the restriction on the names has always been a > limitation, > however, it is more important the port id to physical port relation that is > confusing. > I was not able to test the patch, it does not apply and I didn't have the time > to apply it manually. Thanks for the review Mauricio! The latest hotplug patch fails to apply also. Do you plan to submit a new version soon? Once you do I'll rework this according to your suggestions and send out a v3. Thanks, Ciara > > I have some comments inline. > > On Fri, Jul 1, 2016 at 11:29 AM, Ciara Loftus wrote: > 'dpdk' ports no longer have naming restrictions. Now, instead > of specifying the dpdk port ID as part of the name, the PCI > address of the device must be specified via the 'dpdk-pci' > option. eg. > > ovs-vsctl add-port br0 my-port > ovs-vsctl set Interface my-port type=dpdk > ovs-vsctl set Interface my-port options:dpdk-pci=:06:00.3 > > Signed-off-by: Ciara Loftus > --- > INSTALL.DPDK.md | 12 ++--- > NEWS | 2 + > lib/netdev-dpdk.c | 142 > +- > 3 files changed, 127 insertions(+), 29 deletions(-) > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index 28c5b90..ad2fcbf 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -208,13 +208,13 @@ Using the DPDK with ovs-vswitchd: > > `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev` > > - Now you can add dpdk devices. OVS expects DPDK device names to start > with > - "dpdk" and end with a portid. vswitchd should print (in the log file) the > - number of dpdk devices found. > + Now you can add dpdk devices. The PCI address of the device needs to be > + set using the 'dpdk-pci' option. vswitchd should print (in the log file) > + the number and PCI addresses of dpdk devices found during initialisation. > > ``` > - ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk > - ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk > + ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk > options:dpdk-pci=:06:00.0 > + ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk > options:dpdk-pci=:06:00.1 > ``` > > Once first DPDK port is added to vswitchd, it creates a Polling thread and > @@ -304,7 +304,7 @@ Using the DPDK with ovs-vswitchd: > It is also possible to detach a port from ovs, the user has to remove the > port using the del-port command, then it can be detached using: > > - `ovs-appctl netdev-dpdk/port-detach dpdk0` > + `ovs-appctl netdev-dpdk/port-detach :01:00.0` > > This feature is not supported with VFIO and could not work with some > NICs, > please refer to the [DPDK Port Hotplug Framework] in order to get more > diff --git a/NEWS b/NEWS > index a1146b0..db702b7 100644 > --- a/NEWS > +++ b/NEWS > @@ -49,6 +49,8 @@ Post-v2.5.0 > node that device memory is located on if > CONFIG_RTE_LIBRTE_VHOST_NUMA > is enabled in DPDK. > * Port Hotplug is now supported. > + * DPDK physical ports can now have arbitrary names. The PCI address of > + the device must be set using the 'dpdk-pci' option. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > - ovs-appctl: > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 857339a..8e69f3a 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -144,6 +144,10 @@ static char *vhost_sock_dir = NULL; /* Location of > vhost-user sockets */ > > #define VHOST_ENQ_RETRY_NUM 8 > > +static uint8_t nb_ports; /* Number of DPDK ports initialised */ > +struct rte_pci_addr pci_devs[RTE_MAX_ETHPORTS]; /* PCI info of initialised > DPDK > + devices */ > + > > Is this array necessary? What about always getting it from DPDK? > > static const struct rte_eth_conf port_conf = { > .rxmode = { > .mq_mode = ETH_MQ_RX_RSS, > @@ -757,7 +761,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned > int port_no, > /* If the 'sid' is negative, it means that the kernel fails > * to obtain the pci numa info. In that situation, always > * use 'SOCKET0'. */ > - if (type == DPDK_DEV_ETH) { > + if (type == DPDK_DEV_ETH && (dev->port_id != -1)) { > > The parenthesis around dev->port_id != -1 are not necessary. > sid = rte_eth_dev_socket_id(port_no); > } else { > sid = rte_lcore_to_socket_id(rte_get_master_lcore()); > @@ -795,9 +799,11 @@ netdev_dpdk_init(struct netdev *netdev, unsigned > int port_no, > > if (type == DPDK_DEV_ETH) { > netdev_dpdk_alloc_txq(dev, NR_QUEUE); > - err = dpdk_eth_dev_init(dev); > - if (err) { > - goto unlock; > + if (dev->port_id != -1) { > + err = dpdk_eth_dev_init(dev); > + if (err) { > + goto unlock; > + } > } > } els
Re: [ovs-dev] [RFC PATCH 1/1] netdev-dpdk: vHost client mode and reconnect
> Ciara Loftus writes: > > > A new other_config DB option has been added called 'vhost_driver_mode'. > > By default this is set to 'server' which is the mode of operation OVS > > with DPDK has used up until this point - whereby OVS creates and manages > > vHost user sockets. > > > > If set to 'client', OVS will act as the vHost client and connect to > > sockets created and managed by QEMU which acts as the server. This > mode > > allows for reconnect capability, which allows vHost ports to resume > > normal connectivity in event of switch reset. > > > > QEMU v2.7.0+ is required when using OVS in client mode and QEMU in > > server mode. > > > > Signed-off-by: Ciara Loftus > > --- > > INSTALL.DPDK-ADVANCED.md | 27 +++ > > NEWS | 1 + > > lib/netdev-dpdk.c| 37 ++--- > > vswitchd/vswitch.xml | 13 + > > 4 files changed, 67 insertions(+), 11 deletions(-) > > > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > > index ec1de29..ad3e59e 100644 > > --- a/INSTALL.DPDK-ADVANCED.md > > +++ b/INSTALL.DPDK-ADVANCED.md > > @@ -489,6 +489,33 @@ DPDK 16.07 supports two types of vhost: > > where `-L`: Changes the numbers of channels of the specified network > device > > and `combined`: Changes the number of multi-purpose channels. > > > > +4. Enable OVS vHost client-mode & vHost reconnect (OPTIONAL) > > + > > + By default, OVS DPDK acts as the vHost socket server and QEMU the > > + client. In QEMU v2.7 the option is available for QEMU to act as the > > + server. In order for this to work, OVS DPDK must be switched to > > 'client' > > + mode. This is possible by setting the 'vhost_driver_mode' DB entry > > to > > + 'client' like so: > > + > > + ``` > > + ovs-vsctl set Open_vSwitch . > other_config:vhost_driver_mode="client" > > + ``` > > + > > + This must be done before the switch is launched. It cannot > > sucessfully > > + be changed after switch has launched. > > + > > + One must also append ',server' to the 'chardev' arguments on the > QEMU > > + command line, to instruct QEMU to use vHost server mode, like so: > > + > > + > > + -chardev > socket,id=char0,path=/usr/local/var/run/openvswitch/vhost0,server > > + > > + > > + One benefit of using this mode is the ability for vHost ports to > > + 'reconnect' in event of the switch crashing or being brought down. > Once > > + it is brought back up, the vHost ports will reconnect automatically > > and > > + normal service will resume. > > + > >- VM Configuration with libvirt > > > > * change the user/group, access control policty and restart libvirtd. > > diff --git a/NEWS b/NEWS > > index f50b05e..08bac37 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -53,6 +53,7 @@ Post-v2.5.0 > > * PMD threads servicing vHost User ports can now come from the > NUMA > > node that device memory is located on if > CONFIG_RTE_LIBRTE_VHOST_NUMA > > is enabled in DPDK. > > + * OVS client mode for vHost and vHost reconnect (Requires QEMU 2.7) > > - ovs-benchmark: This utility has been removed due to lack of use and > > bitrot. > > - ovs-appctl: > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 9cf0b0c..6763039 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -138,9 +138,11 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / > ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) > >* yet mapped to another queue. */ > > > > #ifdef VHOST_CUSE > > -static char *cuse_dev_name = NULL;/* Character device > cuse_dev_name. */ > > +static char *cuse_dev_name = NULL; /* Character device > cuse_dev_name. */ > > +#else > > +static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets > > */ > > +static uint64_t vhost_driver_flags = 0; /* Denote whether client/server > mode */ > > #endif > > -static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */ > > > > #define VHOST_ENQ_RETRY_NUM 8 > > > > @@ -845,7 +847,6 @@ netdev_dpdk_vhost_user_construct(struct netdev > *netdev) > > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > const char *name = netdev->name; > > int err; > > -uint64_t flags = 0; > > > > /* 'name' is appended to 'vhost_sock_dir' and used to create a socket > > in > > * the file system. '/' or '\' would traverse directories, so they're > > not > > @@ -868,14 +869,17 @@ netdev_dpdk_vhost_user_construct(struct > netdev *netdev) > > snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s", > > vhost_sock_dir, name); > > > > -err = rte_vhost_driver_register(dev->vhost_id, flags); > > +err = rte_vhost_driver_register(dev->vhost_id, vhost_driver_flags); > > if (err) { > > VLOG_ERR("vhost-user socket device setup failure f
Re: [ovs-dev] [RFC PATCH 1/1] netdev-dpdk: Add support for DPDK 16.07
> > Hi Ciara, > > Ciara Loftus writes: > > > This commit introduces support for DPDK 16.07 and consequently breaks > > compatibility with DPDK 16.04. > > > > DPDK 16.07 introduces some changes to various APIs. These have been > > updated in OVS, including: > > * xstats API: changes to structure of xstats > > * vhost API: replace virtio-net references with 'vid' > > > > Signed-off-by: Ciara Loftus > > --- > > Thanks for this patch - I started work on a similar patchset. Have you > considered changing the stats lookup design so that we only strcmp once > at initialization and then use the stats id to do the actual stats > processing? If thing it's worthwhile, I would gladly donate some code to > the cause :-) Hi Aaron, You're welcome. Certainly, I thought there must be a better way to handle the new xstats but since we're at RFC I didn't pursue much of an investigation. If you have some code to donate I'll happily merge it into the patch and add you as co-author :-) I will probably wait until another release candidate to upload a v2. Thanks, Ciara > > Otherwise it looks good. Thanks for posting it ahead of the DPDK > release, so we can evaluate it! > > Reviewed-by: Aaron Conole > > -Aaron ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [RFC PATCH v2] netdev-dpdk: Arbitrary 'dpdk' port naming
Ignore this - I didn’t number the series correctly. Will send a corrected update in a moment. Ciara > -Original Message- > From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ciara Loftus > Sent: Friday, July 01, 2016 10:16 AM > To: dev@openvswitch.org > Subject: [ovs-dev] [RFC PATCH v2] netdev-dpdk: Arbitrary 'dpdk' port naming > > 'dpdk' ports no longer have naming restrictions. Now, instead > of specifying the dpdk port ID as part of the name, the PCI > address of the device must be specified via the 'dpdk-pci' > option. eg. > > ovs-vsctl add-port br0 my-port > ovs-vsctl set Interface my-port type=dpdk > ovs-vsctl set Interface my-port options:dpdk-pci=:06:00.3 > > Signed-off-by: Ciara Loftus > --- > INSTALL.DPDK.md | 12 ++--- > NEWS | 2 + > lib/netdev-dpdk.c | 142 > +- > 3 files changed, 127 insertions(+), 29 deletions(-) > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index 28c5b90..ad2fcbf 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -208,13 +208,13 @@ Using the DPDK with ovs-vswitchd: > > `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev` > > - Now you can add dpdk devices. OVS expects DPDK device names to start > with > - "dpdk" and end with a portid. vswitchd should print (in the log file) the > - number of dpdk devices found. > + Now you can add dpdk devices. The PCI address of the device needs to be > + set using the 'dpdk-pci' option. vswitchd should print (in the log file) > + the number and PCI addresses of dpdk devices found during initialisation. > > ``` > - ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk > - ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk > + ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk > options:dpdk-pci=:06:00.0 > + ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk > options:dpdk-pci=:06:00.1 > ``` > > Once first DPDK port is added to vswitchd, it creates a Polling thread and > @@ -304,7 +304,7 @@ Using the DPDK with ovs-vswitchd: > It is also possible to detach a port from ovs, the user has to remove the > port using the del-port command, then it can be detached using: > > - `ovs-appctl netdev-dpdk/port-detach dpdk0` > + `ovs-appctl netdev-dpdk/port-detach :01:00.0` > > This feature is not supported with VFIO and could not work with some > NICs, > please refer to the [DPDK Port Hotplug Framework] in order to get more > diff --git a/NEWS b/NEWS > index a1146b0..db702b7 100644 > --- a/NEWS > +++ b/NEWS > @@ -49,6 +49,8 @@ Post-v2.5.0 > node that device memory is located on if > CONFIG_RTE_LIBRTE_VHOST_NUMA > is enabled in DPDK. > * Port Hotplug is now supported. > + * DPDK physical ports can now have arbitrary names. The PCI address of > + the device must be set using the 'dpdk-pci' option. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > - ovs-appctl: > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 857339a..8e69f3a 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -144,6 +144,10 @@ static char *vhost_sock_dir = NULL; /* Location of > vhost-user sockets */ > > #define VHOST_ENQ_RETRY_NUM 8 > > +static uint8_t nb_ports; /* Number of DPDK ports initialised */ > +struct rte_pci_addr pci_devs[RTE_MAX_ETHPORTS]; /* PCI info of initialised > DPDK > + devices */ > + > static const struct rte_eth_conf port_conf = { > .rxmode = { > .mq_mode = ETH_MQ_RX_RSS, > @@ -757,7 +761,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned > int port_no, > /* If the 'sid' is negative, it means that the kernel fails > * to obtain the pci numa info. In that situation, always > * use 'SOCKET0'. */ > -if (type == DPDK_DEV_ETH) { > +if (type == DPDK_DEV_ETH && (dev->port_id != -1)) { > sid = rte_eth_dev_socket_id(port_no); > } else { > sid = rte_lcore_to_socket_id(rte_get_master_lcore()); > @@ -795,9 +799,11 @@ netdev_dpdk_init(struct netdev *netdev, unsigned > int port_no, > > if (type == DPDK_DEV_ETH) { > netdev_dpdk_alloc_txq(dev, NR_QUEUE); > -err = dpdk_eth_dev_init(dev); > -if (err) { > -goto unlock; > +if (dev->port_id != -1) { > +err = dpdk_eth_dev_init(dev); > +if (err) { > +goto unlock; > +} > } > } else { > netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM); > @@ -909,21 +915,14 @@ netdev_dpdk_vhost_user_construct(struct netdev > *netdev) > static int > netdev_dpdk_construct(struct netdev *netdev) > { > -unsigned int port_no; > int err; > > if (rte_eal_init_ret) { > return rte_eal_init_ret; > } > > -/* Names always start with "dpdk" */ > -er
Re: [ovs-dev] [PATCH RFC 0/1] netdev-dpdk: Arbitrary 'dpdk' port naming
> > Hello Ciara, > I like too much the idea of arbitrary names, it has always been a problem > realizing the correct id of a port. > As already mentioned by Jan, what do you think about the possibility of > integrate this with hotplug capabilities? I already sent a patch [1] to > support > hotplug in ovs-dpdk. > > [1] https://patchwork.ozlabs.org/patch/626897/ > Mauricio V, > > On Thu, Jun 16, 2016 at 1:54 PM, Jan Scheurich > wrote: > I very much support the proposal to make configuration of "physical" DPDK > ports more explicit and flexible. Both the ability to specify the port by its > PCI > address as well as the ability to choose arbitrary port names are highly > welcome. > > +1 for idea and implementation. > > Can we combine this proposal with the ability to automatically hot-plug PCI > devices that were not yet bound to DPDK-compatible drivers (e.g. igb_uio, > vfio_pci etc) at start of ovs-vswitchd? > > BR, Jan Hi Jan & Mauricio, Thanks for the feedback. I will look to include the hotplug support in the v2. Thanks, Ciara > > > -Original Message- > > From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ciara > Loftus > > Sent: Tuesday, 14 June, 2016 16:08 > > To: dev@openvswitch.org > > Subject: [ovs-dev] [PATCH RFC 0/1] netdev-dpdk: Arbitrary 'dpdk' port > > naming > > > > This RFC patch addresses the restrictions placed on dpdk port names. > > > > Currently dpdk ports must be named dpdkX where X is the dpdk port-id. > > This patch removes this restriction and introduces the requirement to > specify > > the pci address of the device in the other_config db. eg. > > > > ovs-vsctl add-port br0 my-port -- set Interface my-port type=dpdk > > options:dpdk-pci=:06:00.0 > > > > This patch is a work in progress and minimal testing has been performed, > > although a basic setup switching between two dpdk ports has been verified > > to work. > > > > I'd welcome feedback on both the idea and the approach. > > > > INSTALL.DPDK.md | 10 > > NEWS | 2 ++ > > lib/netdev-dpdk.c | 71 > > +-- > > 3 files changed, 66 insertions(+), 17 deletions(-) > > > > -- > > 2.4.3 > > > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] acinclude: check for numa library
> > Numa library is needed for NUMA aware vHost User functionality. > Incase of missing numa package, the OVS DPDK configuration fails with > "error: Could not find DPDK libraries in /TARGET/lib" though > the DPDK library is installed. > > This patch fixes this inappropriate error by checking for presence of > numa library and output an appropriate error message "error: unable to > find libnuma, install the dependency package" in case of missing package. > > Signed-off-by: Bhanuprakash Bodireddy > > --- > acinclude.m4 | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/acinclude.m4 b/acinclude.m4 > index 3978980..fddd913 100644 > --- a/acinclude.m4 > +++ b/acinclude.m4 > @@ -209,6 +209,8 @@ AC_DEFUN([OVS_CHECK_DPDK], [ >[AC_DEFINE([VHOST_CUSE], [1], [DPDK vhost-cuse support enabled, > vhost-user disabled.]) > DPDK_EXTRA_LIB="-lfuse"]) > > +AC_SEARCH_LIBS([get_mempolicy],[numa],[],[AC_MSG_ERROR([unable > to find libnuma, install the dependency package])]) > + > # On some systems we have to add -ldl to link with dpdk > # > # This code, at first, tries to link without -ldl (""), > -- > 2.4.11 Acked-by: Ciara Loftus ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] netdev-dpdk: NUMA Aware vHost User
> Thanks for the patch! > I'm not sure how to best handle the libnuma dependency. Question: > Is it still useful to move the device to a PMD thread on the appropriate > numa socket, even if DPDK is compiled without > CONFIG_RTE_LIBRTE_VHOST_NUMA? If it's useful, I'm fine with the > approach followed by this patch. Otherwise I think we should handle > the -lnuma inclusion like -lfuse for CUSE and introduce two ifdefs (one > on #include and one on new_device()). > Small comments inline, otherwise this looks good to me. > Thanks, > Daniele Hi Daniele, Thanks for the feedback. I'll address your comments in the next revision. Regarding your question above - as it is now, the PMD will only be relocated if the config option is enabled. If the config option is not enabled it behaves as before, so the case you mention above will not occur. Thanks, Ciara > > 2016-05-24 6:15 GMT-07:00 Ciara Loftus : > This commit allows for vHost User memory from QEMU, DPDK and OVS, as > well as the servicing PMD, to all come from the same socket. > > The socket id of a vhost-user port used to be set to that of the master > lcore. Now it is possible to update the socket id if it is detected > (during VM boot) that the vhost device memory is not on this node. If > this is the case, a new mempool is created from the new node, and the > PMD thread currently servicing the port will no longer, in favour of a > thread from the new node (if enabled in the pmd-cpu-mask). > > To avail of this functionality, one must enable the > CONFIG_RTE_LIBRTE_VHOST_NUMA DPDK configuration option. > > Signed-off-by: Ciara Loftus > --- > .travis.yml | 3 +++ > INSTALL.DPDK.md | 8 ++-- > NEWS | 3 +++ > acinclude.m4 | 2 +- > lib/netdev-dpdk.c | 37 ++-- > - > rhel/openvswitch-fedora.spec.in | 1 + > 6 files changed, 48 insertions(+), 6 deletions(-) > > diff --git a/.travis.yml b/.travis.yml > index ee2cf21..faba325 100644 > --- a/.travis.yml > +++ b/.travis.yml > @@ -11,10 +11,13 @@ addons: > packages: > - bc > - gcc-multilib > + - libnuma1 > > I think libnuma-dev depends on libnuma1, so the above line might not be > necessary. > > + - libnuma-dev > - libssl-dev > - llvm-dev > - libjemalloc1 > - libjemalloc-dev > + - numactl > > Do we need the numactl package? > > > before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index 93f92e4..bbe0234 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -16,7 +16,7 @@ OVS needs a system with 1GB hugepages support. > Building and Installing: > > > -Required: DPDK 16.04 > +Required: DPDK 16.04, libnuma > Optional (if building with vhost-cuse): `fuse`, `fuse-devel` (`libfuse-dev` > on Debian/Ubuntu) > > @@ -443,7 +443,11 @@ Performance Tuning: > > It is good practice to ensure that threads that are in the datapath > are > pinned to cores in the same NUMA area. e.g. pmd threads and QEMU > vCPUs > - responsible for forwarding. > + responsible for forwarding. If DPDK is built with > + CONFIG_RTE_LIBRTE_VHOST_NUMA=y, vHost User ports automatically > + detect the NUMA socket of the QEMU vCPUs and will be serviced by a > PMD > + from the same node provided a core on this node is enabled in the > + pmd-cpu-mask. > > 9. Rx Mergeable buffers > > diff --git a/NEWS b/NEWS > index 4e81cad..24ca39f 100644 > --- a/NEWS > +++ b/NEWS > @@ -32,6 +32,9 @@ Post-v2.5.0 > * DB entries have been added for many of the DPDK EAL command line > arguments. Additional arguments can be passed via the dpdk-extra > entry. > + * PMD threads servicing vHost User ports can now come from the NUMA > + node that device memory is located on if > CONFIG_RTE_LIBRTE_VHOST_NUMA > + is enabled in DPDK. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > - ovs-appctl: > diff --git a/acinclude.m4 b/acinclude.m4 > index f3de855..99ddf04 100644 > --- a/acinclude.m4 > +++ b/acinclude.m4 > @@ -218,7 +218,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > DPDKLIB_FOUND=false > save_LIBS=$LIBS > for extras in "" "-ldl"; do > - LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB" > + LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB -lnuma" > AC_LINK_IFELSE( > [AC_LANG_PROGRAM([#include > #include ], > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 0d1b8c9..ad6c4bb 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -30,6 +30,7 @@ > #include > #include > #include > +#include > > #include "dirs.h" > #include "dp-packet.h" > @@ -378,6 +379,9 @@ struct netdev_dpdk { > * netdev_dpdk*_reconfigure() is called */ > int r
Re: [ovs-dev] [PATCH] netdev-dpdk: NUMA Aware vHost User
> > > >This commit allows for vHost User memory from QEMU, DPDK and OVS, as > >well as the servicing PMD, to all come from the same socket. > > > >The socket id of a vhost-user port used to be set to that of the master > >lcore. > >Now it is possible to update the socket id if it is detected (during VM boot) > >that the vhost device memory is not on this node. If this is the case, a new > >mempool is created from the new node, and the PMD thread currently > >servicing the port will no longer, in favour of a thread from the new node > >(if > >enabled in the pmd-cpu-mask). > > > >To avail of this functionality, one must enable the > >CONFIG_RTE_LIBRTE_VHOST_NUMA DPDK configuration option. > > > >Signed-off-by: Ciara Loftus > >--- > > .travis.yml | 3 +++ > > INSTALL.DPDK.md | 8 ++-- > > NEWS| 3 +++ > > acinclude.m4| 2 +- > > lib/netdev-dpdk.c | 37 ++- > -- > > rhel/openvswitch-fedora.spec.in | 1 + > > 6 files changed, 48 insertions(+), 6 deletions(-) > > > >diff --git a/.travis.yml b/.travis.yml > >index ee2cf21..faba325 100644 > >--- a/.travis.yml > >+++ b/.travis.yml > >@@ -11,10 +11,13 @@ addons: > > packages: > > - bc > > - gcc-multilib > >+ - libnuma1 > >+ - libnuma-dev > > - libssl-dev > > - llvm-dev > > - libjemalloc1 > > - libjemalloc-dev > >+ - numactl > > > > before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh > > > >diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index 93f92e4..bbe0234 > >100644 > >--- a/INSTALL.DPDK.md > >+++ b/INSTALL.DPDK.md > >@@ -16,7 +16,7 @@ OVS needs a system with 1GB hugepages support. > > Building and Installing: > > > > > >-Required: DPDK 16.04 > >+Required: DPDK 16.04, libnuma > > The change above makes libnuma mandatory to build OVS with DPDK > datapath. The config option CONFIG_RTE_LIBRTE_VHOST_NUMA is disabled > by default in DPDK-16.04 and hence steps to enable this option and build > DPDK may have to be captured in "Configure build & Install DPDK" section of > the install guide. Hi Bhanu, Thanks for your feedback. I chose to omit this from the "configure build & install DPDK" section since this configuration option is optional and I suspect the average user would not be looking for this type of functionality. Those who are looking to fine-tune a dual-node vHost User set-up can find the info in the performance tuning section. > > > Optional (if building with vhost-cuse): `fuse`, `fuse-devel` (`libfuse-dev` > > on > >Debian/Ubuntu) > > > >@@ -443,7 +443,11 @@ Performance Tuning: > > > > It is good practice to ensure that threads that are in the datapath are > > pinned to cores in the same NUMA area. e.g. pmd threads and > QEMU > >vCPUs > >-responsible for forwarding. > >+responsible for forwarding. If DPDK is built with > >+CONFIG_RTE_LIBRTE_VHOST_NUMA=y, vHost User ports > >automatically > >+detect the NUMA socket of the QEMU vCPUs and will be serviced by > a > >PMD > >+from the same node provided a core on this node is enabled in the > >+pmd-cpu-mask. > > > > 9. Rx Mergeable buffers > > > >diff --git a/NEWS b/NEWS > >index 4e81cad..24ca39f 100644 > >--- a/NEWS > >+++ b/NEWS > >@@ -32,6 +32,9 @@ Post-v2.5.0 > > * DB entries have been added for many of the DPDK EAL command line > >arguments. Additional arguments can be passed via the dpdk-extra > >entry. > >+ * PMD threads servicing vHost User ports can now come from the > NUMA > >+ node that device memory is located on if > >CONFIG_RTE_LIBRTE_VHOST_NUMA > >+ is enabled in DPDK. > >- ovs-benchmark: This utility has been removed due to lack of use and > > bitrot. > >- ovs-appctl: > >diff --git a/acinclude.m4 b/acinclude.m4 index f3de855..99ddf04 100644 > >--- a/acinclude.m4 > >+++ b/acinclude.m4 > >@@ -218,7 +218,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > > DPDKLIB_FOUND=false > > save_LIBS=$LIBS > > for extras in "" "-ldl"; do > >-LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB" > >+LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB -lnuma" > > The above change makes libnuma mandatory for configuring OVS using DPDK > datapath while ' CONFIG_RTE_LIBRTE_VHOST_NUMA' is disabled by default. > IMHO, can we check if LIBRTE_VHOST_NUMA is enabled(from rte_config.h) > and append "lnuma" only when it is true. This is inline with how we handle > VHOST CUSE case. With this patch libnuma is a requirement whether or not CONFIG_RTE_LIBRTE_VHOST_NUMA is enabled since we are using the get_mempolicy() function in netdev-dpdk. The alternative would be to, as you suggested, detect NUMA=y from rte_config.h and introduce #ifdef VHOST_NUMA around the code where get_mempolicy() is used. Personally I would be more in favour of always linking with libnuma but willing to introduce the change if the consensu
Re: [ovs-dev] [PATCH v3 3/3] netdev-dpdk: Add vhost-user 'get_features' & 'get_status' functions
> > > > Implementations for the netdev functions 'get_features' and > > 'get_status' are now available for vhost-user thanks to the addition of > > the vHost PMD. > > > > Signed-off-by: Ciara Loftus > > --- > > lib/netdev-dpdk.c | 23 +-- > > 1 file changed, 13 insertions(+), 10 deletions(-) > > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 814ef83..fce1655 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -2301,15 +2301,18 @@ netdev_dpdk_get_status(const struct netdev > > *netdev, struct smap *args) > > smap_add_format(args, "max_rx_queues", "%u", > > dev_info.max_rx_queues); > > smap_add_format(args, "max_tx_queues", "%u", > > dev_info.max_tx_queues); > > smap_add_format(args, "max_mac_addrs", "%u", > > dev_info.max_mac_addrs); > > -smap_add_format(args, "max_hash_mac_addrs", "%u", > > dev_info.max_hash_mac_addrs); > > -smap_add_format(args, "max_vfs", "%u", dev_info.max_vfs); > > -smap_add_format(args, "max_vmdq_pools", "%u", > > dev_info.max_vmdq_pools); > > > > -if (dev_info.pci_dev) { > > -smap_add_format(args, "pci-vendor_id", "0x%u", > > -dev_info.pci_dev->id.vendor_id); > > -smap_add_format(args, "pci-device_id", "0x%x", > > -dev_info.pci_dev->id.device_id); > > +if (dev->type == DPDK_DEV_ETH) { > > +smap_add_format(args, "max_hash_mac_addrs", "%u", > > +dev_info.max_hash_mac_addrs); > > +smap_add_format(args, "max_vfs", "%u", dev_info.max_vfs); > > +smap_add_format(args, "max_vmdq_pools", "%u", > > dev_info.max_vmdq_pools); > > +if (dev_info.pci_dev) { > > +smap_add_format(args, "pci-vendor_id", "0x%u", > > +dev_info.pci_dev->id.vendor_id); > > +smap_add_format(args, "pci-device_id", "0x%x", > > +dev_info.pci_dev->id.device_id); > > +} > > } > > > > return 0; > > @@ -3431,8 +3434,8 @@ static const struct netdev_class OVS_UNUSED > > dpdk_vhost_user_class = > > netdev_dpdk_vhost_user_send, > > netdev_dpdk_get_carrier, > > netdev_dpdk_get_stats, > > -NULL, > > -NULL, > > +netdev_dpdk_get_features, > > +netdev_dpdk_get_status, > > Maybe a comment for 1/3 but just thought of it while reviewing this: > do you need to call check_link_status() in netdev_dpdk_get_status() now > that it's not on a timer anymore? or is it guaranteed to be called for > all interfaces prior to netdev_dpdk_get_status(). Do you mean in netdev_dpdk_get_carrier() ? Perhaps the call isn't needed anymore... but there's maybe one corner case. Not sure if it's possible but if both the link status changed interrupt and netdev_dpdk_get_carrier are both called, it is whoever takes the dpdk_mutex first that will continue first. If netdev_dpdk_get_carrier() gets the mutex, it will only get up-to-date link info if the check_link_status() is called. If the call is not there, get_carrier() will return the old link status, free the mutex, and the interrupt will continue on and update the status to the new one immediately after. So I think it should be kept, but I don't have strong feelings about it either way. Thanks, Ciara > > > > netdev_dpdk_vhost_user_rxq_recv); > > > > void > > -- > > 2.4.3 > > > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v3 2/3] netdev-dpdk: Add vHost User PMD
> > > > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' > > ports > > to be controlled by the librte_ether API, like physical 'dpdk' ports. > > The commit integrates this functionality into OVS, and refactors some > > of the existing vhost code such that it is vhost-cuse specific. > > Similarly, there is now some overlap between dpdk and vhost-user port > > code. > > > > Signed-off-by: Ciara Loftus > > Hi, few minor comments below. I didn't review the cuse specific code this > time around. Thanks Kevin for the feedback, my responses are inline. Ciara > > Kevin. > > > > --- > > INSTALL.DPDK.md | 12 ++ > > NEWS | 2 + > > lib/netdev-dpdk.c | 628 +- > -- > > -- > > 3 files changed, 396 insertions(+), 246 deletions(-) > > > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > > index 93f92e4..db7153a 100644 > > --- a/INSTALL.DPDK.md > > +++ b/INSTALL.DPDK.md > > @@ -990,6 +990,18 @@ Restrictions: > > increased to the desired number of queues. Both DPDK and OVS > > must be > > recompiled for this change to take effect. > > > > + DPDK 'eth' type ports: > > + - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the > > context of > > +DPDK as they are all managed by the rte_ether API. This means > > that they > > +adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS > > which by > > +default is set to 32. This means by default the combined total > > number of > > +dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK > > is 32. This > > +value can be changed if desired by modifying the configuration > > file in > > +DPDK, or by overriding the default value on the command line > > when building > > +DPDK. eg. > > + > > +`make install CONFIG_RTE_MAX_ETHPORTS=64` > > + > > Bug Reporting: > > -- > > > > diff --git a/NEWS b/NEWS > > index 4e81cad..841314b 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -32,6 +32,8 @@ Post-v2.5.0 > > * DB entries have been added for many of the DPDK EAL command > > line > > arguments. Additional arguments can be passed via the dpdk- > > extra > > entry. > > + * vHost PMD integration brings vhost-user ports under control > > of the > > + rte_ether DPDK API. > > - ovs-benchmark: This utility has been removed due to lack of use > > and > > bitrot. > > - ovs-appctl: > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 89d783a..814ef83 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -55,6 +55,7 @@ > > #include "unixctl.h" > > > > #include "rte_config.h" > > +#include "rte_eth_vhost.h" > > #include "rte_mbuf.h" > > #include "rte_meter.h" > > #include "rte_virtio_net.h" > > @@ -139,6 +140,11 @@ static char *cuse_dev_name = NULL;/* > > Character device cuse_dev_name. */ > > #endif > > static char *vhost_sock_dir = NULL; /* Location of vhost-user > > sockets */ > > > > +/* Array that tracks the used & unused vHost user driver IDs */ > > +static unsigned int vhost_user_drv_ids[RTE_MAX_ETHPORTS]; > > I think you can replace this array with a counter. You don't need a > unique id - just that you are < MAX. I considered at first using a counter, but what if the counter reaches the MAX but we still have space for most vHost ports? eg. We add RTE_MAX_ETHPORTS vHost ports, delete all the ports, then try to add one again but can't because the counter is at max. Even if we decrement the counter on delete this still doesn't solve the problem because the port we delete won't necessarily be the last one we've added. > > > +/* Maximum string length allowed to provide to rte_eth_attach > > function */ > > +#define DEVARGS_MAX (RTE_ETH_NAME_MAX_LEN + PATH_MAX + 18) > > + > > /* > > * Maximum amount of time in micro seconds to try and enqueue to > > vhost. > > */ > > @@ -172,7 +178,8 @@ enum { DRAIN_TSC = 20ULL }; > > > > enum dpdk_dev_type { > > DPDK_DEV_ETH = 0, > > -DPDK_DEV_VHOST = 1, > > +DPDK_DEV_VHOST_USER = 1, > > +DPDK_DEV_VHOST_CUSE = 2, > > }; > > > > static int rte_eal_init_ret = ENODEV; > > @@ -358,12 +365,22 @@ struct netdev_dpdk { > > int real_n_rxq; > > bool txq_needs_locking; > > > > -/* virtio-net structure for vhost device */ > > +/* Spinlock for vhost cuse transmission. Other DPDK devices use > > spinlocks > > + * in dpdk_tx_queue */ > > +rte_spinlock_t vhost_cuse_tx_lock; > > + > > +/* virtio-net structure for vhost cuse device */ > > OVSRCU_TYPE(struct virtio_net *) virtio_dev; > > > > +/* Number of virtqueue pairs reported by the guest */ > > +uint32_t vhost_qp_nb; > > + > > /* Identifier used to distinguish vhost devices from each other > > */ > > char vhost_id[PATH_MAX]; > > > > +/* ID of vhost user port given to the PMD driver */ > > +unsigned int vhost_pmd_id; > > + > > This could be removed if you just use a counter as p
Re: [ovs-dev] [ovs-dev,v2,2/3] netdev-dpdk: Add vHost User PMD
> On 10.05.2016 12:25, Ciara Loftus wrote: > > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports > > to be controlled by the librte_ether API, like physical 'dpdk' ports. > > The commit integrates this functionality into OVS, and refactors some > > of the existing vhost code such that it is vhost-cuse specific. > > Similarly, there is now some overlap between dpdk and vhost-user port > > code. > > > > Signed-off-by: Ciara Loftus > > --- > > INSTALL.DPDK.md | 12 ++ > > NEWS | 2 + > > lib/netdev-dpdk.c | 493 ++ > > > 3 files changed, 248 insertions(+), 259 deletions(-) > > > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > > index 93f92e4..db7153a 100644 > > --- a/INSTALL.DPDK.md > > +++ b/INSTALL.DPDK.md > > @@ -990,6 +990,18 @@ Restrictions: > > increased to the desired number of queues. Both DPDK and OVS must > be > > recompiled for this change to take effect. > > > > + DPDK 'eth' type ports: > > + - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context > of > > +DPDK as they are all managed by the rte_ether API. This means that > they > > +adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS > which by > > +default is set to 32. This means by default the combined total number > > of > > +dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32. > This > > +value can be changed if desired by modifying the configuration file in > > +DPDK, or by overriding the default value on the command line when > building > > +DPDK. eg. > > + > > +`make install CONFIG_RTE_MAX_ETHPORTS=64` > > + > > Bug Reporting: > > -- > > > > diff --git a/NEWS b/NEWS > > index 4e81cad..841314b 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -32,6 +32,8 @@ Post-v2.5.0 > > * DB entries have been added for many of the DPDK EAL command line > > arguments. Additional arguments can be passed via the dpdk-extra > > entry. > > + * vHost PMD integration brings vhost-user ports under control of the > > + rte_ether DPDK API. > > - ovs-benchmark: This utility has been removed due to lack of use and > > bitrot. > > - ovs-appctl: > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 89d783a..0e5b141 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -55,6 +55,7 @@ > > #include "unixctl.h" > > > > #include "rte_config.h" > > +#include "rte_eth_vhost.h" > > #include "rte_mbuf.h" > > #include "rte_meter.h" > > #include "rte_virtio_net.h" > > @@ -139,6 +140,11 @@ static char *cuse_dev_name = NULL;/* > Character device cuse_dev_name. */ > > #endif > > static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */ > > > > +/* Array that tracks the used & unused vHost user driver IDs */ > > +static unsigned int vhost_user_drv_ids[RTE_MAX_ETHPORTS]; > > +/* Maximum string length allowed to provide to rte_eth_attach function > */ > > +#define DEVARGS_MAX (RTE_ETH_NAME_MAX_LEN + PATH_MAX + 18) > > + > > /* > > * Maximum amount of time in micro seconds to try and enqueue to vhost. > > */ > > @@ -172,7 +178,8 @@ enum { DRAIN_TSC = 20ULL }; > > > > enum dpdk_dev_type { > > DPDK_DEV_ETH = 0, > > -DPDK_DEV_VHOST = 1, > > +DPDK_DEV_VHOST_USER = 1, > > +DPDK_DEV_VHOST_CUSE = 2, > > }; > > > > static int rte_eal_init_ret = ENODEV; > > @@ -304,8 +311,6 @@ struct dpdk_tx_queue { > > * from concurrent access. It is used > > only > > * if the queue is shared among > > different > > * pmd threads (see > > 'txq_needs_locking'). */ > > -int map; /* Mapping of configured vhost-user > > queues > > -* to enabled by guest. */ > > uint64_t tsc; > > struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN]; > > }; > > @@ -358,12 +363,22 @@ struct netdev_dpdk { > > int real_n_rxq; > > bool txq_needs_locking; > > > > -/* virtio-net structure for vhost device */ > > +/* Spinlock for vhost cuse transmission. Other DPDK devices use > spinlocks > > + * in dpdk_tx_queue */ > > +rte_spinlock_t vhost_cuse_tx_lock; > > + > > +/* virtio-net structure for vhost cuse device */ > > OVSRCU_TYPE(struct virtio_net *) virtio_dev; > > > > +/* Number of virtqueue pairs reported by the guest */ > > +uint32_t vhost_qp_nb; > > + > > /* Identifier used to distinguish vhost devices from each other */ > > char vhost_id[PATH_MAX]; > > > > +/* ID of vhost user port given to the PMD driver */ > > +unsigned int vhost_pmd_id; > > + > > /* In dpdk_list. */ > > struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); > > > > @@ -381,16 +396,20 @@ struct netdev_rxq_dpdk { > > static bool dpdk_thread_is_pmd(void); > > > > static int netdev_dpdk_const
Re: [ovs-dev] [PATCH] netdev-dpdk: Add vHost User PMD
> > On 10.05.2016 11:31, Ilya Maximets wrote: > > On 03.05.2016 14:28, ciara.loftus at intel.com (Loftus, Ciara) wrote: > >>> This patch seem to remove a lot of txq remapping functions (like > >>> netdev_dpdk_remap_txqs).Ã, How does it handle the case of a > disabled txq in > >>> the guest kernel? > >> There is a difference in the amount of information we can get about > vrings > >> in OVS now. With the PMD, we no longer have direct access to the > virtio_net > >> structure. We used to use virto_net->virt_qp_nb to determine the > number of > >> vrings (enabled and disabled) in the guest kernel, and we could map > disabled > >> onto enabled accordingly. Now with the PMD, we only get vring > information as > >> their state changes. eg. VM with 2 vrings enabled -> we assume there are > only > >> 2 vrings, even though there may be many more that are disabled. We > don't need > >> to map because we aren't aware of the disabled queues. > > > > virtio protocol still allows to disable random queue in guest. This patch > > will > > work only with linux kernel virtio driver on guest side and just because > > linux > > kernel driver always enables/disables queues sequentially. For example, > you may > > write your own application with virtio-user with 2 rx/tx queues in guest and > > disable rx queue #0. This scenario will lead to broken connection while > > queue #1 still enabled. > > > > Best regards, Ilya Maximets. > > Hi Ilya, Apologies I didn't see your mail before I sent a v2 of the patch. Thanks for the information. My testing involved the kernel driver and DPDK driver in the guest so it did not expose this type of issue. With the PMD we now receive the following data struct during vring_state_changed_callback: struct rte_eth_vhost_queue_event { uint16_t queue_id; bool rx; bool enable; }; Since we have queue_id information we should be able correctly handle the case you mentioned above. I will look to implement a solution to this in the v3. Thanks, Ciara ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] netdev-dpdk: Add vHost User PMD
> On 21/04/2016 13:20, Ciara Loftus wrote: > > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports > > to be controlled by the librte_ether API, like physical 'dpdk' ports. > > The commit integrates this functionality into OVS, and refactors some > > of the existing vhost code such that it is vhost-cuse specific. > > Similarly, there is now some overlap between dpdk and vhost-user port > > code. > > > > Signed-off-by: Ciara Loftus > > --- > > INSTALL.DPDK.md | 12 ++ > > NEWS | 2 + > > lib/netdev-dpdk.c | 515 +- > > > Hi Ciara, there's a lot of churn in this file. It might be worth > considering to see if it could be split through a few commits commits to > help reviewers. e.g. new features like adding get_features, get_status > for vhost could be a separate patch at least. I've split into 3: - remove watchdog - add pmd - add get_stats & get_features Couldn't quite find a way to split it up more. > > > 3 files changed, 254 insertions(+), 275 deletions(-) > > mode change 100644 => 100755 lib/netdev-dpdk.c > > file permission change. Woops. Fixed in v2. > > > > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > > index 7f76df8..5006812 100644 > > --- a/INSTALL.DPDK.md > > +++ b/INSTALL.DPDK.md > > @@ -945,6 +945,18 @@ Restrictions: > > increased to the desired number of queues. Both DPDK and OVS must > be > > recompiled for this change to take effect. > > > > + DPDK 'eth' type ports: > > + - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context > of > > +DPDK as they are all managed by the rte_ether API. This means that > they > > +adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS > which by > > +default is set to 32. This means by default the combined total number > > of > > +dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32. > This > > +value can be changed if desired by modifying the configuration file in > > +DPDK, or by overriding the default value on the command line when > building > > +DPDK. eg. > > + > > +`make install CONFIG_RTE_MAX_ETHPORTS=64` > > format is not registering right for this in my md viewer. It's looking ok on mine. What doesn't look right? > > > + > > Bug Reporting: > > -- > > > > diff --git a/NEWS b/NEWS > > index ea7f3a1..4dc0201 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -26,6 +26,8 @@ Post-v2.5.0 > > assignment. > >* Type of log messages from PMD threads changed from INFO to DBG. > >* QoS functionality with sample egress-policer implementation. > > + * vHost PMD integration brings vhost-user ports under control of the > > + rte_ether DPDK API. > > - ovs-benchmark: This utility has been removed due to lack of use and > >bitrot. > > - ovs-appctl: > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > old mode 100644 > > new mode 100755 > > index 208c5f5..4fccd63 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -56,6 +56,7 @@ > > #include "rte_mbuf.h" > > #include "rte_meter.h" > > #include "rte_virtio_net.h" > > +#include "rte_eth_vhost.h" > > nit: generally these go in alphabetical order. Ok > > > > > VLOG_DEFINE_THIS_MODULE(dpdk); > > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > > @@ -109,6 +110,8 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / > ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) > > > > static char *cuse_dev_name = NULL;/* Character device > cuse_dev_name. */ > > static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */ > > +/* Array that tracks the used & unused vHost user driver IDs */ > > +static unsigned int vhost_user_drv_ids[RTE_MAX_ETHPORTS]; > > > > /* > >* Maximum amount of time in micro seconds to try and enqueue to > vhost. > > @@ -143,7 +146,8 @@ enum { DRAIN_TSC = 20ULL }; > > > > enum dpdk_dev_type { > > DPDK_DEV_ETH = 0, > > -DPDK_DEV_VHOST = 1, > > +DPDK_DEV_VHOST_USER = 1, > > +DPDK_DEV_VHOST_CUSE = 2, > > }; > > > > static int rte_eal_init_ret = ENODEV; > > @@ -275,8 +279,6 @@ struct dpdk_tx_queue { > > * from concurrent access. It is used > > only > > * if the queue is shared among > > different > > * pmd threads (see > > 'txq_needs_locking'). */ > > -int map; /* Mapping of configured vhost-user > > queues > > -* to enabled by guest. */ > > uint64_t tsc; > > struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN]; > > }; > > @@ -329,12 +331,22 @@ struct netdev_dpdk { > > int real_n_rxq; > > bool txq_needs_locking; > > > > -/* virtio-net structure for vhost device */ > > +/* Spinlock for vhost cuse transmission. Other DPDK devices use > spinlocks > > + * in dpdk_tx_queu
Re: [ovs-dev] [PATCH] netdev-dpdk: print default vhost-sock-dir if none provided
> Hi Ciara, > > Please add a Fixes: tag. > > Ciara Loftus writes: > > > Signed-off-by: Ciara Loftus > > --- > > lib/netdev-dpdk.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index b488c60..67b9e0d 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -2982,7 +2982,7 @@ dpdk_init__(const struct smap > *ovs_other_config) > > if (process_vhost_flags("cuse-dev-name", xstrdup("vhost-net"), > > PATH_MAX, ovs_other_config, &cuse_dev_name)) { > > #else > > -if (process_vhost_flags("vhost-sock-dir", xstrdup(""), > > +if (process_vhost_flags("vhost-sock-dir", xstrdup(ovs_rundir()), > > NAME_MAX, ovs_other_config, > > &sock_dir_subcomponent)) { > > struct stat s; > > Please include this (untested) hunk (or something like it): > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index b488c60..4edf18b 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -3003,8 +3003,7 @@ dpdk_init__(const struct smap *ovs_other_config) > } > free(sock_dir_subcomponent); > } else { > -vhost_sock_dir = xstrdup(ovs_rundir()); > -free(sock_dir_subcomponent); > +vhost_sock_dir = sock_dir_subcomponent; > #endif Hi Aaron, Thanks for the suggestions. I've updated and send a v2 with an update to the documentation too. Thanks, Ciara ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] netdev-dpdk: Add vHost User PMD
> > Hi Ciara, > thanks for doing this. I really think this has the potential to clean up the > netdev-dpdk code. Thanks for your feedback Daniele. Comments inline, v2 soon. > The clang thread safety analyzer reports some warnings: > > CC lib/netdev-dpdk.lo > ../lib/netdev-dpdk.c:882:1: error: mutex 'dpdk_mutex' is not held on every > path through here [-Werror,-Wthread-safety-analysis] > } > ^ > ../lib/netdev-dpdk.c:870:5: note: mutex acquired here > ovs_mutex_lock(&dpdk_mutex); > ^ > ../include/openvswitch/thread.h:60:9: note: expanded from macro > 'ovs_mutex_lock' > ovs_mutex_lock_at(mutex, OVS_SOURCE_LOCATOR) > ^ > 1 error generated. Thanks. Will be fixed in v2. > I see that this patch removes the transmission locks for the txqs. I think > those > are still needed by physical NICs. Correct - they managed to disappear with the churn in v1! My mistake. > This patch seem to remove a lot of txq remapping functions (like > netdev_dpdk_remap_txqs). How does it handle the case of a disabled txq in > the guest kernel? There is a difference in the amount of information we can get about vrings in OVS now. With the PMD, we no longer have direct access to the virtio_net structure. We used to use virto_net->virt_qp_nb to determine the number of vrings (enabled and disabled) in the guest kernel, and we could map disabled onto enabled accordingly. Now with the PMD, we only get vring information as their state changes. eg. VM with 2 vrings enabled -> we assume there are only 2 vrings, even though there may be many more that are disabled. We don't need to map because we aren't aware of the disabled queues. > I see that vhost-cuse is still handled separately. Is it possible to use the > vhost > pmd also for vhost-cuse? Otherwise we still basically have to handle It's not possible to use the PMD for vhost-cuse, just vhost-user. > differently three cases: NIC PMD, vhost user pmd, vhost cuse. Maybe it's > time to remove vhost-cuse (I understand this is a separate issue, though)? I guess it's as good a time as any to discuss this. Would be interested to hear opinions from the community. > I get an error when I try this: > > ovs-vsctl add-port br0 p1 -- set Interface p1 type="dpdkvhostuser" > ovs-vsctl del-port br0 p1 > ovs-vsctl add-port br0 p1 -- set Interface p1 type="dpdkvhostuser" Will be fixed in the v2, too. I was resetting vhost_pmd_id in netdev_dpdk_init when I shouldn't have been. Removing that fixes problems with add/del combinations. > > More comments inline > Thanks! > > 2016-04-21 5:20 GMT-07:00 Ciara Loftus : > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports > to be controlled by the librte_ether API, like physical 'dpdk' ports. > The commit integrates this functionality into OVS, and refactors some > of the existing vhost code such that it is vhost-cuse specific. > Similarly, there is now some overlap between dpdk and vhost-user port > code. > > Signed-off-by: Ciara Loftus > --- > INSTALL.DPDK.md | 12 ++ > NEWS | 2 + > lib/netdev-dpdk.c | 515 + > - > 3 files changed, 254 insertions(+), 275 deletions(-) > mode change 100644 => 100755 lib/netdev-dpdk.c > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index 7f76df8..5006812 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -945,6 +945,18 @@ Restrictions: > increased to the desired number of queues. Both DPDK and OVS must be > recompiled for this change to take effect. > > + DPDK 'eth' type ports: > + - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context > of > + DPDK as they are all managed by the rte_ether API. This means that they > + adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS > which by > + default is set to 32. This means by default the combined total number of > + dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32. > This > + value can be changed if desired by modifying the configuration file in > + DPDK, or by overriding the default value on the command line when > building > + DPDK. eg. > + > + `make install CONFIG_RTE_MAX_ETHPORTS=64` > + > > This seems a heavy limitation compared to the previous librte_vhost > approach. Are there any plans to increase this in DPDK upstream? Not that I'm aware of. > > Bug Reporting: > -- > > diff --git a/NEWS b/NEWS > index ea7f3a1..4dc0201 100644 > --- a/NEWS > +++ b/NEWS > @@ -26,6 +26,8 @@ Post-v2.5.0 > assignment. > * Type of log messages from PMD threads changed from INFO to DBG. > * QoS functionality with sample egress-policer implementation. > + * vHost PMD integration brings vhost-user ports under control of the > + rte_ether DPDK API. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > - ovs-appctl: > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.
Re: [ovs-dev] [PATCH RFC 1/1] netdev-dpdk: NUMA Aware vHost
> > Thanks for the patch, I'll put this in the use case list for > my series if I need to resend it! > > It would be nice to get the numa socket information without > linking OVS with libnuma, maybe using some DPDK api. From > a quick look I didn't find any way, but maybe you know a > better way. > > Some preliminary comments inline Thanks for the feedback. I've sent out a v2 of the patch that incorporates most of your requests. I just noticed I misspelled your name in the cover letter, apologies! Some small comments below. Thanks, Ciara > > On 04/03/2016 02:08, "dev on behalf of Ciara Loftus" > > wrote: > > >This commit allows for vHost memory from QEMU, DPDK and OVS, as well > >as the servicing PMD, to all come from the same socket. > > > >DPDK v2.2 introduces a new configuration option: > >CONFIG_RTE_LIBRTE_VHOST_NUMA. If enabled, DPDK detects the socket > >from which a vhost device's memory has been allocated by QEMU, and > >accordingly reallocates device memory managed by DPDK to that same > >socket. > > > >OVS by default sets the socket id of a vhost port to that of the > >master lcore. This commit introduces the ability to update the > >socket id of the port if it is detected (during VM boot) that the > >port memory is not on the default NUMA node. If this is the case, the > >mempool of the port is also changed to the new node, and a PMD > >thread currently servicing the port will no longer, in favour of a > >thread from the new node (if enabled in the CPU mask). > > > >Signed-off-by: Ciara Loftus > >--- > > INSTALL.DPDK.md | 6 +- > > acinclude.m4 | 2 +- > > lib/netdev-dpdk.c | 25 +++-- > > 3 files changed, 29 insertions(+), 4 deletions(-) > > > >diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > >index dca79bd..82e6908 100644 > >--- a/INSTALL.DPDK.md > >+++ b/INSTALL.DPDK.md > >@@ -33,6 +33,10 @@ on Debian/Ubuntu) > > > > `CONFIG_RTE_BUILD_COMBINE_LIBS=y` > > > >+ Enable NUMA-aware vHost by modifying the following in the same file: > >+ > >+ `CONFIG_RTE_LIBRTE_VHOST_NUMA=y` > >+ > > I guess we should also update install_dpdk() in ./travis/build.sh to do > this if it's required I left this out because everything will still work ok (ie. as before) without this option. It can be optionally enabled if the functionality is desired. However if we think it should be always enabled I can include this in the next revision. > > > Then run `make install` to build and install the library. > > For default install without IVSHMEM: > > > >@@ -383,7 +387,7 @@ Performance Tuning: > > > > It is good practice to ensure that threads that are in the datapath are > > pinned to cores in the same NUMA area. e.g. pmd threads and > QEMU vCPUs > >-responsible for forwarding. > >+responsible for forwarding. This is now default behavior for vHost > >ports. > > > > 9. Rx Mergeable buffers > > > >diff --git a/acinclude.m4 b/acinclude.m4 > >index 11c7787..432bdbd 100644 > >--- a/acinclude.m4 > >+++ b/acinclude.m4 > >@@ -199,7 +199,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > > found=false > > save_LIBS=$LIBS > > for extras in "" "-ldl"; do > >-LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB" > >+LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB -lnuma" > > I guess we should also list libnuma-dev in .travis.yml and something > similar > in rhel/openvswitch-fedora.spec I updated these in the v2. > > > AC_LINK_IFELSE( > >[AC_LANG_PROGRAM([#include > > #include ], > >diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > >index 17b8d51..4e1ce53 100644 > >--- a/lib/netdev-dpdk.c > >+++ b/lib/netdev-dpdk.c > >@@ -29,6 +29,7 @@ > > #include > > #include > > #include > >+#include > > > > #include "dirs.h" > > #include "dp-packet.h" > >@@ -1878,6 +1879,8 @@ new_device(struct virtio_net *dev) > > { > > struct netdev_dpdk *netdev; > > bool exists = false; > >+int newnode = 0; > >+long err = 0; > > > > ovs_mutex_lock(&dpdk_mutex); > > /* Add device to the vhost port with the same name as that passed > >down. */ > >@@ -1891,6 +1894,24 @@ new_device(struct virtio_net *dev) > > } > > ovsrcu_set(&netdev->virtio_dev, dev); > > exists = true; > >+ > >+/* Get NUMA information */ > >+err = get_mempolicy(&newnode, NULL, 0, dev, MPOL_F_NODE | > >MPOL_F_ADDR); > >+if (err) { > >+VLOG_INFO("Error getting NUMA info for vHost Device > >'%s'", > >+dev->ifname); > >+newnode = netdev->socket_id; > >+} else if (newnode != netdev->socket_id) { > >+netdev->socket_id = newnode; > >+/* Change mempool to new NUMA Node */ > >+dpdk_mp_put(netdev->dpdk_mp); > >+netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, > >netdev->mtu); > >+/* Request netdev reconfiguration.
Re: [ovs-dev] OVS with DPDK Meetup notes
> > > > On Thu, Nov 26, 2015 at 05:56:08PM +, Traynor, Kevin wrote: > > > Hi All, > > > > > > Just wanted to post some summary notes on the recent OVS with DPDK > Meetup > > we > > > had after the OVS conference. Thanks to everyone for the often lively > > discussion. > > > I've collated and condensed Maryam's notes (Thank you Maryam) with > my own. > > > Corrections and additions are welcome. > > > > Thanks for having organized the event and for the good notes. > > > > > > > Usability > > > == > > > * Single binary for OVS/OVS with DPDK and static vs. dynamic linking > > > - Discussion around deployment and what the best model is. > > > - Flavio has posted a mail on this > > >http://openvswitch.org/pipermail/dev/2015-November/062599.html > > > > Let us know if you find a performance difference between static vs > > dynamic linking. We might be able to accommodate both options in > > the same spec, but it seems we should go with shared linking only > > to keep it simple for now. > > > > Yes, will do. I seem to recall from when we looked at this on a previous > project it was a few hundred kpps but it was a long time ago, so I'm not > certain how many. > > > > > > Features > > > > > > * Multiqueue vhost-user > > > - Looks really promising - will help us scale out performance to the VM. > > > > I see that vhost PMD is moving and if it gets accepted, it would > > be a nice clean up for OVS. Do you know if there is someone working > > on this already? > > I agree, it should simplify the code a lot. Ciara reviewed it and did a > quick integration to see if the api would work. The patch was churning quite > a bit, so we decided to hold off doing any more work with it for the time > being. Correct, the vHost PMD really cleans things up and removes the need for a lot of code in netdev-dpdk. The netdev_class for phy ports and vhost-user ports could be pretty much the same, except for the construct functions. > > > > > > * dpdkr/ivshmem > > > - Still useful. Check/Update documentation to ensure limitations are > > clear. > > > > Yeah, same thing here. > > > > Thanks, > > fbl ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] INSTALL.DPDK: Mention issue with QEMU v2.4.0 & dpdkvhostuser
> > Hi, > > Is this a bug in OVS code? In this case I think we can work on a fix, > rather > than suggesting to downgrade QEMU. > > If this is a bug in DPDK librte_vhost, is there a fix available (on > master, or > on the mailing list)? Hi Daniele, The fix is available in DPDK master: http://dpdk.org/browse/dpdk/commit/?id=2c95f4de6a7ec59c5793c64588066b6b2b8e6142 I checked, it applies cleanly on top of DPDK v2.1.0. I could mention this in my INSTALL.DPDK patch. eg. Either apply this DPDK patch or use an earlier QEMU version. What do you think? Thanks, Ciara > > Thanks > > On 29/10/2015 04:50, "Ciara Loftus" wrote: > > >Currently when using QEMU v2.4.0+, two (or more) dpdkvhostuser ports > >cannot be unbound from the kernel driver in the guest without causing > >the ovs-vswitchd process to crash. Document this limitation. > > > >Signed-off-by: Ciara Loftus > >--- > > INSTALL.DPDK.md | 10 ++ > > 1 file changed, 10 insertions(+) > > > >diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > >index 7bf110c..01963e9 100644 > >--- a/INSTALL.DPDK.md > >+++ b/INSTALL.DPDK.md > >@@ -891,6 +891,16 @@ Restrictions: > > core count of the system to be less than or equal to 64 when using > >an XL710 > > interface with DPDK. > > > >+ vHost and QEMU v2.4.0+: > >+ - For versions of QEMU v2.4.0 and later, it is currently not possible > >to > >+unbind more than one dpdkvhostuser port from the guest kernel driver > >without > >+causing the ovs-vswitchd process to crash. If this is a requirement > >for your > >+use case, it is recommended to use a version of QEMU between v2.2.0 > >and > >+v2.3.1 (inclusive). This problem will likely be resolved in Open > >vSwitch at > >+a later date, when the next release of DPDK is available and > >integrated. > >+Note: The QEMU commit ID that breaks compatibility is > >+294ce717e0f212ed0763307f3eab72b4a1bdf4d0. > >+ > > Bug Reporting: > > -- > > > >-- > >1.9.3 > > > >___ > >dev mailing list > >dev@openvswitch.org > >https://urldefense.proofpoint.com/v2/url?u=http- > 3A__openvswitch.org_mailma > >n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw- > YihVMNtXt-uEs&r=Sm > >B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=2vLCArpNUJzLbDd2 > -6RrKoZXJuvAnc > >DxVRpVb2KP4zE&s=ubo8WHxMWKfjSZtGY4G9HcmXUMMCGE7FvEk6adAs > xQ8&e= ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] dpif-netdev: move header prefetch earlier into the receive function
> > I tested the patch, but I wasn't able to reproduce your measurements. I was able to reproduce a slight performance improvement with this patch for single-flow uni-directional 64B traffic - up about 0.3Mpps for me. Thanks, Ciara > > On my test setup I noticed no difference in throughput for different packet > sizes/flow tables. > > Could you describe your setup in more details? > > I'd be happy to improve prefetching if it a simple change like the this. > > Thanks, > > Daniele > > On 17/09/2015 21:29, "Zoltan Kiss" wrote: > > >It's better to have it in the cache as soon as possible. On my test setup > >it > >meant a 0.7 Mpps increase. > > > >Signed-off-by: Zoltan Kiss > > > >diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > >index 72e5653..3312cc0 100644 > >--- a/lib/dpif-netdev.c > >+++ b/lib/dpif-netdev.c > >@@ -3229,11 +3229,6 @@ emc_processing(struct dp_netdev_pmd_thread > *pmd, > >struct dp_packet **packets, > > continue; > > } > > > >-if (i != cnt - 1) { > >-/* Prefetch next packet data */ > >-OVS_PREFETCH(dp_packet_data(packets[i+1])); > >-} > >- > > miniflow_extract(packets[i], &key.mf); > > key.len = 0; /* Not computed yet. */ > > key.hash = dpif_netdev_packet_get_rss_hash(packets[i], &key.mf); > >diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > >index e4e3d2c..c3c7ec0 100644 > >--- a/lib/netdev-dpdk.c > >+++ b/lib/netdev-dpdk.c > >@@ -1015,7 +1015,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, > >struct dp_packet **packets, > > struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq_); > > struct netdev *netdev = rx->up.netdev; > > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > >-int nb_rx; > >+int nb_rx, i; > > > > /* There is only one tx queue for this core. Do not flush other > > * queues. > >@@ -1033,6 +1033,9 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, > >struct dp_packet **packets, > > return EAGAIN; > > } > > > >+for (i = 0; i < nb_rx; i++) > >+OVS_PREFETCH(dp_packet_data(packets[i])); > >+ > > *c = nb_rx; > > > > return 0; > > > >___ > >dev mailing list > >dev@openvswitch.org > >https://urldefense.proofpoint.com/v2/url?u=http- > 3A__openvswitch.org_mailma > >n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw- > YihVMNtXt-uEs&r=Sm > >B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=3RVqArIQwG9h7SC > _5ZdxmrRZ3p0soQ > >NTcUPJwi3-ZZA&s=Q9jC4I1Pmb4XizqudZUy0cMZBgHvmlSGcZerBzZW- > Ig&e= > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] bugfix: Fix DPDK linking when using a relative path
> > When linking with DPDK, if a relative path is used with the > '--with-dpdk' flag, then OVS will always be compiled with vHost Cuse > support, even if it is not enabled in the DPDK build. > This patch fixes this problem, and enables the correct version of > vHost despite whether or not a relative or absolute path is used. > --- > acinclude.m4 | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/acinclude.m4 b/acinclude.m4 > index b755dc4..47d9318 100644 > --- a/acinclude.m4 > +++ b/acinclude.m4 > @@ -174,9 +174,10 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > DPDK_LIB_DIR=$RTE_SDK/lib > DPDK_LIB="-ldpdk" > DPDK_EXTRA_LIB="" > +RTE_SDK_FULL=`readlink -f $RTE_SDK` > > AC_COMPILE_IFELSE( > - [AC_LANG_PROGRAM([#include <$RTE_SDK/include/rte_config.h> > + [AC_LANG_PROGRAM([#include > <$RTE_SDK_FULL/include/rte_config.h> > #if !RTE_LIBRTE_VHOST_USER > #error > #endif], [])], > -- > 1.9.3 > Apologies, I forgot: Signed-off-by: Ciara Loftus Let me know if a resend is necessary. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] intra VM communication
> > Yea you are right , it is during the vswitch init it is reaching there , that > time > there was no VM running . But once I start the VMs I don't see ovs trying to > de queue at all . (Ovs just stops to dequeue after few attempts of failure) > > Is it expected behavior ? If you have the same vHost device (in your case dpdk1) attached to two VMs I expect you will run into issues. I suggest you modify your QEMU command lines to use dpdk0 & dpdk1 for VM1 and dpdk2 and dpdk3 for VM2, rather than using dpdk1 port in both. > /srikanth > > > > On Thursday, July 23, 2015, Loftus, Ciara wrote: > > > > Hi Ciara , > > When i further try to debug the issue , i could see that > > > > if (OVS_UNLIKELY(!is_vhost_running(virtio_dev))) { > > return EAGAIN; > > <<<<<<<<<<< it always returns from here >>>>>>>> > > } > > >>>>>> I believe that dequeue has to be called to get the packets from > > Guest to User space . > > nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid, > > vhost_dev->dpdk_mp->mp, > > (struct rte_mbuf **)packets, > > NETDEV_MAX_BURST); > > When you enter that section of code it usually means your vHost device has > not been brought up in a VM yet. > It's unclear which of your 4 vHost devices is failing the is_vhost_running > test, > but my guess is that it is 'dpdk2' - from your setup it appears this device > doesn't get used in a virtual machine and thus OVS will never attempt to > dequeue from that device because it is essentially NULL. > > Please see below a comment on your QEMU command lines from the > previous email. > > Thanks, > Ciara > > > > > Below are the logs for my vswitch with dpdkvhostuser ports. > > > > 2015-07-22T17:33:40.395Z|00020|bridge|INFO|bridge temp0: using > datapath > > ID e295aa430244 > > 2015-07-22T17:33:40.395Z|00021|connmgr|INFO|temp0: added service > > controller "punix:/var/run/openvswitch/temp0.mgmt" > > 2015-07-22T17:33:40.462Z|00022|dpif_netdev|INFO|Created 1 pmd > threads > > on numa node 0 > > 2015-07-22T17:33:40.465Z|00023|bridge|INFO|ovs-vswitchd (Open > vSwitch) > > 2.4.90 > > 2015-07-22T17:33:40.466Z|1|dpif_netdev(pmd41)|INFO|Core 0 > > processing port 'dpdk3' > > 2015-07-22T17:33:40.466Z|2|dpif_netdev(pmd41)|INFO|Core 0 > > processing port 'dpdk2' > > 2015-07-22T17:33:40.466Z|3|dpif_netdev(pmd41)|INFO|Core 0 > > processing port 'dpdk1' > > 2015-07-22T17:33:40.466Z|4|dpif_netdev(pmd41)|INFO|Core 0 > > processing port 'dpdk0' > > 2015-07-22T17:33:44.470Z|00024|memory|INFO|729380 kB peak resident > set > > size after 10.3 seconds > > 2015-07-22T17:33:44.470Z|00025|memory|INFO|handlers:13 ports:5 > > revalidators:5 rules:5 > > > > I am under a strong opinion that i have missed some configuration here . > > Please let me know . > > -Srikanth > > > > > > On Wed, Jul 22, 2015 at 11:30 AM, Srikanth Akula > > wrote: > > Hi Ciera , > > Thank you for your reply . > > > > I am assuming , we dont need to configure any flows if both the ports are in > > the ovs-bridge ( each is connected to a guest) , Please let me know if i am > > wrong . > > however, i tried to configure the flows too as per your suggestion , but > > still i > > am unable to see any packets in the host for that bridge . > > > > I am using Qemu 2.2.0 > > qemu-system-x86_64 --version > > QEMU emulator version 2.2.0, Copyright (c) 2003-2008 Fabrice Bellard > > > > My qemu commandline options : > > > > VM1 : > > > > /usr/bin/qemu-system-x86_64 -name Vhost1 -S -machine pc-i440fx- > > 2.2,accel=kvm,usb=off -cpu > > SandyBridge,+invpcid,+erms,+bmi2,+smep,+avx2,+b > > > mi1,+fsgsbase,+abm,+pdpe1gb,+rdrand,+f16c,+osxsave,+movbe,+dca,+pcid > > > ,+pdcm,+xtpr,+fma,+tm2,+est,+smx,+vmx,+ds_cpl,+monitor,+dtes64,+pbe, > > +tm,+ht,+ss,+acpi,+ds,+vme -m 15024 -realtime mlock=off -smp 16,so > > ckets=16,cores=1,threads=1 -uuid fed77f13-ba10-57e4-7dd8-7629e6181657 > - > > no-user-config -nodefaults -chardev > > > socket,id=charmonitor,path=/var/lib/libvirt/qemu/Vhost1.monitor,server,no > > wait -mon chardev=char > > monitor,id=monitor,mode=control -rtc base=utc -no-shutdown -boot > > strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive >
Re: [ovs-dev] intra VM communication
,accel=kvm,usb=off -cpu > SandyBridge,+invpcid,+erms,+bmi2,+smep,+avx2,+b > mi1,+fsgsbase,+abm,+pdpe1gb,+rdrand,+f16c,+osxsave,+movbe,+dca,+pcid > ,+pdcm,+xtpr,+fma,+tm2,+est,+smx,+vmx,+ds_cpl,+monitor,+dtes64,+pbe, > +tm,+ht,+ss,+acpi,+ds,+vme -m 15024 -realtime mlock=off -smp 8,soc > kets=8,cores=1,threads=1 -uuid 30bc0154-7057-a7d6-12e1-7a2d8a178d47 - > no-user-config -nodefaults -chardev > socket,id=charmonitor,path=/var/lib/libvirt/qemu/Vhost2.monitor,server,no > wait -mon chardev=charmo > nitor,id=monitor,mode=control -rtc base=utc -no-shutdown -boot strict=on - > device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive > file=/test2.img,if=none,id=drive-virtio-disk0,format=raw > -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio- > disk0,id=virtio-disk0,bootindex=1 -netdev > tap,fd=24,id=hostnet0,vhost=on,vhostfd=26 -device virtio-net- > pci,netdev=hostnet0,id=net0, > mac=52:54:00:4d:91:f5,bus=pci.0,addr=0x3 -chardev pty,id=charserial0 - > device isa-serial,chardev=charserial0,id=serial0 -vnc 127.0.0.1:1 -device > cirrus- > vga,id=video0,bus=pci.0,addr=0x2 -device intel- > hda,id=sound0,bus=pci.0,addr=0x4 -device hda-duplex,id=sound0- > codec0,bus=sound0.0,cad=0 -device virtio-balloon- > pci,id=balloon0,bus=pci.0,addr=0x6 -chardev > socket,id=char1,path=/var/run/openvswitch/dpdk1 -netdev type=vhost- > user,id=mynet1,chardev=char1,vhostforce -device virtio-net- > pci,mac=00:00:00:00:00:03,netdev=mynet1 -chardev > socket,id=char2,path=/var/run/openvswitch/dpdk3 -netdev type=vhost- > user,id=mynet2,chardev=char2,vhostforce -device virtio-net- > pci,mac=00:00:00:00:00:04,netdev=mynet2 -object memory-backend- > file,id=mem,size=2048M,mem-path=/mnt/huge/,share=on > ovs-vsctl : You are attaching the 'dpdk1' device to two VMs - I expect this is why you are experiencing problems. I assume you intended on using dpdk2? > > ovs-vsctl show > 3c25dda6-46c4-454c-8bdf-3832636b1f71 > Bridge "temp0" > Port "dpdk1" > Interface "dpdk1" > type: dpdkvhostuser > Port "temp0" > Interface "temp0" > type: internal > Port "dpdk2" > Interface "dpdk2" > type: dpdkvhostuser > Port "dpdk0" > Interface "dpdk0" > type: dpdkvhostuser > Port "dpdk3" > Interface "dpdk3" > type: dpdkvhostuser > ovs_version: "2.4.90" > > My vswitchd options > > ovs-vswitchd --dpdk -c 0x0FF8 -n 4 --socket-mem 1024 0 -- > unix:/var/run/openvswitch/db.sock -vconsole:emer -vsyslog:err -vfile:info -- > mlockall --no-chdir --log-file=/var/log/openvswitch/ovs-vswitchd.log -- > detach --monitor > > > ovs-ofctl dump-flows temp0 > NXST_FLOW reply (xid=0x4): > cookie=0x0, duration=871.033s, table=0, n_packets=0, n_bytes=0, > idle_age=871, in_port=ANY actions=output:3 > > I am trying in the following way. . > > [vm1] [vm2] > and the Ip address are in the same subnet on the 2 Vms .. (2.2.2.x/24) > > > Please let me know if any of the configuration is having any issues. > -Srikanth > > > On Wed, Jul 22, 2015 at 2:39 AM, Loftus, Ciara > wrote: > > > > Hello, > > > > I am trying to use vhost-user for sending traffic between VMs . I have > > configured two "dpdkvhostuser" interfaces each VM using one of them > each > > . > > > > vswitchd is running with dpdk. > > Qemu is running with the vhost interfaces > > > > Guest OS can see interfaces - Verified with the static MAC i have assigned > > for vhost interfaces. > > > > But i am not able to ping b/w these two VMs . Could somebody tell me > how > > to debug this further . > > Hi, > > To ping between the VMs first assign appropriate IP addresses, then > configure the following flows: > in_port=,actions=output: > in_port=,actions=output: > > These flows allow the request/response packets to take the necessary path > for a successful ping & you should see the stats incrementing with ovs-ofctl > dump-flows. > > If you've already done this and it's still not working, please ensure your > QEMU version is v2.2.0 or greater. > > Thanks, > Ciara > > > > > In the host i could see the ovs-netdev & ovs bridge i have created . > > > > Regards, > > Srikanth > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] intra VM communication
> > Hello, > > I am trying to use vhost-user for sending traffic between VMs . I have > configured two "dpdkvhostuser" interfaces each VM using one of them each > . > > vswitchd is running with dpdk. > Qemu is running with the vhost interfaces > > Guest OS can see interfaces - Verified with the static MAC i have assigned > for vhost interfaces. > > But i am not able to ping b/w these two VMs . Could somebody tell me how > to debug this further . Hi, To ping between the VMs first assign appropriate IP addresses, then configure the following flows: in_port=,actions=output: in_port=,actions=output: These flows allow the request/response packets to take the necessary path for a successful ping & you should see the stats incrementing with ovs-ofctl dump-flows. If you've already done this and it's still not working, please ensure your QEMU version is v2.2.0 or greater. Thanks, Ciara > > In the host i could see the ovs-netdev & ovs bridge i have created . > > Regards, > Srikanth > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] Fix detection of vhost_cuse in dpdk rte_config.h
> > The patch makes sense for branch-2.4 and master. Thanks! > > Acked-by: Daniele Di Proietto +1 from me too - thanks for fixing this! > > On 06/07/2015 22:41, "Mussar, Gary" wrote: > > >Fix detection of vhost_cuse in dpdk rte_config.h > > > >Dpdk allows users to create a config that includes other config files and > >then override values. > > > >Eg. > >defconfig_x86_64-native_vhost_cuse-linuxapp-gcc: > > > >CONFIG_RTE_BUILD_COMBINE_LIBS=y > >CONFIG_RTE_BUILD_SHARED_LIB=n > >CONFIG_RTE_LIBRTE_VHOST=y > >CONFIG_RTE_LIBRTE_VHOST_USER=n > > > >This allows you to have both a vhostuser and vhostcuse config in the same > >source tree without the need to replicate everything in those config files > >just to change a couple of settings. The resultant .config file has all of > >the settings from the included files with the updated settings at the end. > >The resultant rte_config.h contains multiple undefs and defines for the > >overridden settings. > > > >Eg. > >> grep RTE_LIBRTE_VHOST_USER > >>x86_64-native_vhost_cuse-linuxapp-gcc/include/rte_config.h > > > >The current mechanism to detect the RTE_LIBRTE_VHOST_USER setting > merely > >greps the rte_config.h file for the string "define RTE_LIBRTE_VHOST_USER > >1" > >rather than the final setting of RTE_LIBRTE_VHOST_USER. The following > >patch > >changes this test to detect the final setting of RTE_LIBRTE_VHOST_USER. > > > >Signed-off-by: Gary Mussar > >--- > > acinclude.m4 | 7 ++- > > 1 file changed, 6 insertions(+), 1 deletion(-) > > > >diff --git a/acinclude.m4 b/acinclude.m4 > >index 20391ec..ef6523a 100644 > >--- a/acinclude.m4 > >+++ b/acinclude.m4 > >@@ -221,8 +221,13 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > > AC_SUBST([DPDK_vswitchd_LDFLAGS]) > > AC_DEFINE([DPDK_NETDEV], [1], [System uses the DPDK module.]) > > > >-OVS_GREP_IFELSE([$RTE_SDK/include/rte_config.h], [define > >RTE_LIBRTE_VHOST_USER 1], > >+AC_LANG_PUSH(C) > >+AC_EGREP_CPP([int vhost = 1;], [ > >+#include <$RTE_SDK/include/rte_config.h> > >+int vhost = RTE_LIBRTE_VHOST_USER; > >+], > > [], [AC_DEFINE([VHOST_CUSE], [1], [DPDK vhost-cuse > >support enabled, vhost-user disabled.])]) > >+AC_LANG_POP() > > else > > RTE_SDK= > > fi > >-- > >1.9.1 > >___ > >dev mailing list > >dev@openvswitch.org > >https://urldefense.proofpoint.com/v2/url?u=http- > 3A__openvswitch.org_mailma > >n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw- > YihVMNtXt-uEs&r=Sm > >B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=sQCuPKlyVW4ybSg > ERn0uwJvifgVJwH > >cwvkQoaJoJ5Pw&s=PnU6MnfB9vIg- > 7Sq71VGeuUaxwYNKD6fR2CZZTX6FFY&e= > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 08/11] metaflow: Extend size of mf_value to 128 bytes.
> On Tue, Jun 30, 2015 at 7:56 AM, Loftus, Ciara > wrote: > >> > >> On Wed, Jun 24, 2015 at 1:17 PM, Ben Pfaff wrote: > >> > On Fri, Jun 19, 2015 at 04:13:22PM -0700, Jesse Gross wrote: > >> >> Tunnel metadata can be substantially larger than our existing fields > >> >> (up to 124 bytes in a single Geneve option) so this extends the size > >> >> of the data that we can handle with metaflow fields. This also > >> >> breaks a few tests that assume that their max size is also the > >> >> maximum that can be handled in a field. > >> >> > >> >> Signed-off-by: Jesse Gross > >> > > >> > Did you look around at all to see whether this will unreasonably blow up > >> > any data or algorithms? > >> > >> I don't believe that it should have any significant effects. > >> Generally, code does operations on the fields based on mf->n_bytes > >> (with the exception of some memset()s here and there). I don't think > >> that we really store these in a large number for any real period of > >> time. > > > > With this series of patches, in particular patch 10/11 "tunnel: Geneve TLV > handling support for OpenFlow" I've measured a significant decrease in > performance with the dpdkport type. For example, with a loopback test with > 64Byte packets I've seen a 25% decrease in throughput. > > I suspect this is in relation to the size of the new tun_metadata struct. A > quick perf analysis and I see we're spending significantly more time > initialising > packet metadata in the dp_netdev_process_rxq_port function. > > Are there any plans to address this performance degradation? > > Thanks for pointing that out. I just sent out a patch that should > hopefully avoid the problem of needing to initialize the newly > enlarged structure. I don't have a great way of doing performance > testing on it, would you mind seeing if it solves the problem you're > seeing? Hi Jesse, I've tested it and the new patch solves the problem I was seeing. Thanks for providing a fix so quickly! Ciara ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 08/11] metaflow: Extend size of mf_value to 128 bytes.
> > On Wed, Jun 24, 2015 at 1:17 PM, Ben Pfaff wrote: > > On Fri, Jun 19, 2015 at 04:13:22PM -0700, Jesse Gross wrote: > >> Tunnel metadata can be substantially larger than our existing fields > >> (up to 124 bytes in a single Geneve option) so this extends the size > >> of the data that we can handle with metaflow fields. This also > >> breaks a few tests that assume that their max size is also the > >> maximum that can be handled in a field. > >> > >> Signed-off-by: Jesse Gross > > > > Did you look around at all to see whether this will unreasonably blow up > > any data or algorithms? > > I don't believe that it should have any significant effects. > Generally, code does operations on the fields based on mf->n_bytes > (with the exception of some memset()s here and there). I don't think > that we really store these in a large number for any real period of > time. With this series of patches, in particular patch 10/11 "tunnel: Geneve TLV handling support for OpenFlow" I've measured a significant decrease in performance with the dpdkport type. For example, with a loopback test with 64Byte packets I've seen a 25% decrease in throughput. I suspect this is in relation to the size of the new tun_metadata struct. A quick perf analysis and I see we're spending significantly more time initialising packet metadata in the dp_netdev_process_rxq_port function. Are there any plans to address this performance degradation? Thanks, Ciara ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v7] netdev-dpdk: add dpdk vhost-user ports
> On Thu, Jun 4, 2015 at 6:53 AM, Loftus, Ciara wrote: > > > >> On Fri, May 22, 2015 at 8:40 AM, Ciara Loftus > wrote: > >> > This patch adds support for a new port type to the userspace > >> > datapath called dpdkvhostuser. > ... > ... > >> > + " --vhost_sock_dir DIR override default directory > >> > where\n" > >> > + "vhost-user sockets are > >> > created.\n"); > >> > >> since --cuse_dev_name and --vhost_sock_dir is sub-argument under > >> --dpdk, it should have single hyphen prefix. > > > > Thanks for the review, please see suggested changes implemented in the > v8 patch. > Thanks for the updated patch. > > > I've chosen to keep the 'dpdkvhostuser' port name and instead change the > 'dpdkvhost' port to 'dpdkvhostcuse'. There tends to be confusion sometimes > around which port is in use, but with these names it will be clearer to the > user exactly which vhost they are using. > > > In future release we will deprecate vhost-cuse and there will be only > one option to setup dpdk-vhost. Therefore there is no need to > explicitly name it this way. Hi Pravin, I checked with the OpenStack team to see if there would be any impact of this name change and unfortunately there is. The OpenStack project has a hard dependency on the "dpdkvhostuser" port name. Please see: https://github.com/openstack/nova/blob/stable/kilo/nova/network/linux_net.py#L1384-1385 This code was part of the recent "Kilo" release and from what I know, it would be very unlikely to be able to change this on their stable branch. Even if this was possible, it appears it would cause some difficulty as distros would be required to package the changed code. Considering that changing the port name would effectively remove vhost-user support from OpenStack, would you be willing to keep this name at least until such a time as the OpenStack project can change the name in their codebase? Thanks, Ciara ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v7] netdev-dpdk: add dpdk vhost-user ports
> On Fri, May 22, 2015 at 8:40 AM, Ciara Loftus wrote: > > This patch adds support for a new port type to the userspace > > datapath called dpdkvhostuser. > > > > A new dpdkvhostuser port will create a unix domain socket which > > when provided to QEMU is used to facilitate communication between > > the virtio-net device on the VM and the OVS port on the host. > > > > vhost-cuse ('dpdkvhost') ports are still available, and will be > > enabled if vhost-cuse support is detected in the DPDK build > > specified during compilation of the switch. Otherwise, vhost-user > > ports are enabled. > > > > v4: > > - Included helper function for the new_device callbacks to minimise > > code duplication. > > - Fixed indentation & line-wrap. > > - Simplified and corrected the processing of vhost ovs-vswitchd flags. > > > > v5: > > - Removed unnecessary strdup() > > - Fixed spacing > > > > v6: > > - Rebased to master > > > > v7: > > - Rebased to master > > > > Signed-off-by: Ciara Loftus > > --- > > Thanks for all changes. > The change log should not be part of commit message but should be > written here after "---". > > > INSTALL.DPDK.md | 174 > ++-- > > acinclude.m4| 3 + > > lib/netdev-dpdk.c | 153 ++--- > - > > lib/netdev.c| 3 +- > > vswitchd/ovs-vswitchd.c | 5 ++ > > 5 files changed, 277 insertions(+), 61 deletions(-) > > > ... > > > Following the steps above to create a bridge, you can now add DPDK vhost > > -as a port to the vswitch. > > +as a port to the vswitch. Unlike DPDK ring ports, DPDK vhost ports can > have > > +arbitrary names. > > + > > +When adding vhost ports to the switch, take care depending on which > type of > > +vhost you are using. > > > > -`ovs-vsctl add-port br0 dpdkvhost0 -- set Interface dpdkvhost0 > type=dpdkvhost` > > + - For vhost-user (default), the name of the port type is `dpdkvhostuser` > > + > > + ``` > > + ovs-ofctl add-port br0 vhost-user-1 -- set Interface vhost-user-1 > > + type=dpdkvhostuser > > + ``` > > > > -Unlike DPDK ring ports, DPDK vhost ports can have arbitrary names: > > + This action creates a socket located at > > + `/usr/local/var/run/openvswitch/vhost-user-1`, which you must > provide > > + to your VM on the QEMU command line. More instructions on this can > be > > + found in the next section "DPDK vhost-user VM configuration" > > + Note: If you wish for the vhost-user sockets to be created in a > > + directory other than `/usr/local/var/run/openvswitch`, you may specify > > + another location on the ovs-vswitchd command line like so: > > > > -`ovs-vsctl add-port br0 port123ABC -- set Interface port123ABC > type=dpdkvhost` > > + `./vswitchd/ovs-vswitchd --dpdk --vhost_sock_dir /my-dir -c 0x1 ...` > > > Since we are going to deprecate cuse in future release, we should > switch the type naming. vhost user should be dpdkvhost and cuse can be > dpdkvhostcuse. > > > -However, please note that when attaching userspace devices to QEMU, > the > > -name provided during the add-port operation must match the ifname > parameter > > -on the QEMU command line. > > + - For vhost-cuse, the name of the port type is `dpdkvhost` > > > > + ``` > > + ovs-ofctl add-port br0 vhost-cuse-1 -- set Interface vhost-cuse-1 > > + type=dpdkvhost > > + ``` > > + > > + When attaching vhost-cuse ports to QEMU, the name provided during > the > > + add-port operation must match the ifname parameter on the QEMU > command > > + line. More instructions on this can be found in the section "DPDK > > + vhost-cuse VM configuration" > > + > > +DPDK vhost-user VM configuration: > > +- > > +Follow the steps below to attach vhost-user port(s) to a VM. > > > > -DPDK vhost VM configuration: > > - > > +1. Configure sockets. > > + Pass the following parameters to QEMU to attach a vhost-user device: > > > > - vhost ports use a Linux* character device to communicate with QEMU. > > + ``` > > + -chardev socket,id=char1,path=/usr/local/var/run/openvswitch/vhost- > user-1 > > + -netdev type=vhost-user,id=mynet1,chardev=char1,vhostforce > > + -device virtio-net-pci,mac=00:00:00:00:00:01,netdev=mynet1 > > + ``` > > + > > + ...where vhost-user-1 is the name of the vhost-user port added > > + to the switch. > > + Repeat the above parameters for multiple devices, changing the > > + chardev path and id as necessary. Note that a separate and different > > + chardev path needs to be specified for each vhost-user device. For > > + example you have a second vhost-user port named 'vhost-user-2', you > > + append your QEMU command line with an additional set of parameters: > > + > > + > > + ``` > > + -chardev socket,id=char2,path=/usr/local/var/run/openvswitch/vhost- > user-2 > > + -netdev type=vhost-user,id=mynet2,chardev=char
Re: [ovs-dev] [PATCH v3] netdev-dpdk: add dpdk vhost-user ports
> On 05/11/2015 01:56 PM, Ciara Loftus wrote: > > This patch adds support for a new port type to the userspace > > datapath called dpdkvhostuser. > > > > A new dpdkvhostuser port will create a unix domain socket which > > when provided to QEMU is used to facilitate communication between > > the virtio-net device on the VM and the OVS port on the host. > > > > vhost-cuse ('dpdkvhost') ports are still available, and will be > > enabled if vhost-cuse support is detected in the DPDK build > > specified during compilation of the switch. Otherwise, vhost-user > > ports are enabled. > > > > Signed-off-by: Ciara Loftus > > --- > [...] > > diff --git a/acinclude.m4 b/acinclude.m4 > > index e9d0ed9..2873480 100644 > > --- a/acinclude.m4 > > +++ b/acinclude.m4 > > @@ -218,6 +218,9 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > > DPDK_vswitchd_LDFLAGS=-Wl,--whole-archive,$DPDK_LIB,--no-whole- > archive > > AC_SUBST([DPDK_vswitchd_LDFLAGS]) > > AC_DEFINE([DPDK_NETDEV], [1], [System uses the DPDK module.]) > > + > > +OVS_GREP_IFELSE([$RTE_SDK/include/rte_config.h], [define > RTE_LIBRTE_VHOST_USER 1], > > +[], [AC_DEFINE([VHOST_CUSE], [1], [DPDK vhost-cuse > > support > enabled, vhost-user disabled.])]) > > else > > RTE_SDK= > > fi > > This isn't really needed, you could just include rte_config.h from > netdev-dpdk.c ... but maybe this is better afterall as it leaves a trace > in the build log as to which version was chosen. Will keep it in for the reason you mention above. > > [...] > > +static int > > +new_device_vhost_user(struct virtio_net *dev) > > +{ > > +struct netdev_dpdk *netdev; > > +bool exists = false; > > + > > +ovs_mutex_lock(&dpdk_mutex); > > +/* Add device to the vhost port with the same name as that passed > down. */ > > +LIST_FOR_EACH(netdev, list_node, &dpdk_list) { > > +if (strncmp(dev->ifname, netdev->socket_path, IF_NAME_SZ) == 0) { > > +ovs_mutex_lock(&netdev->mutex); > > +ovsrcu_set(&netdev->virtio_dev, dev); > > +ovs_mutex_unlock(&netdev->mutex); > > +exists = true; > > +dev->flags |= VIRTIO_DEV_RUNNING; > > +/* Disable notifications. */ > > +set_irq_status(dev); > > +break; > > +} > > +} > > +ovs_mutex_unlock(&dpdk_mutex); > > + > > +if (!exists) { > > +VLOG_INFO("vHost Device '%s' (%ld) can't be added - name not > found", > > + dev->ifname, dev->device_fh); > > + > > +return -1; > > +} > > + > > +VLOG_INFO("vHost Device '%s' (%ld) has been added", > > + dev->ifname, dev->device_fh); > > +return 0; > > +} > > + > > /* > >* Remove a virtio-net device from the specific vhost port. Use dev- > >remove > >* flag to stop any more packets from being sent or received to/from a VM > and > > Sorry for missing this the last time around, but this too seems like > unwanted code duplication since the whole thing differs from the vhost > case by just that one strncmp() line. Its a little trickier than the > constructor case since the struct member to compare differs but solvable. Makes sense, I've introduced a helper function in v4. > > [...] > > +static int > > +process_vhost_flags(char* flag, char* default_val, int size, char** argv, > char** new_val) > > +{ > > + int changed = 0; > > The indentation is off here, there's a tab where other code on this > level is at four spaces. Fixed in v4. > > > + > > +/* Depending on which version of vhost is in use, process the vhost- > specific > > + * flag if it is provided on the vswitchd command line, otherwise > > resort > to > > + * a default value. > > + * > > + * For vhost-user: Process "--cuse_dev_name" to set the custom > location of > > + * the vhost-user socket(s). > > + * For vhost-cuse: Process "--vhost_sock_dir" to set the custom name > of the > > + * vhost-cuse character device. > > + */ > > +if (!strcmp(argv[1], flag) && > > + (strlen(argv[2]) <= size)) { > > Why the line-split? This fits easily on one line ... okay it was that > way already, but no reason not to "fix" it when moving around. Fixed in v4. > > > + > > +*new_val = strdup(argv[2]); > > + > > +VLOG_ERR("User-provided %s in use: %s", flag, *new_val); > > +changed = 1; > > +} else { > > +*new_val = default_val; > > +VLOG_INFO("No %s provided - defaulting to %s", flag, default_val); > > +} > > + > > +return changed; > > +} > > + > > int > > dpdk_init(int argc, char **argv) > > { > > int result; > > int base = 0; > > char *pragram_name = argv[0]; > > +int flag_processed = 0; > > > > if (argc < 2 || strcmp(argv[1], "--dpdk")) > > return 0; > > @@ -1869,27 +1983,17 @@ dpdk_init(int argc, char **argv) > > argc--; > > argv++; > > > > -/* If the cuse_dev_name parameter has
Re: [ovs-dev] [PATCH] netdev-dpdk: add dpdk vhost-user ports
> On Tue, May 5, 2015 at 8:58 AM, Ciara Loftus wrote: > > This patch adds support for a new port type to the userspace > > datapath called dpdkvhostuser. It adds to the existing > > infrastructure of vhost-cuse, however disables vhost-cuse ports > > as the default port type, in favour of vhost-user ports. > > vhost-cuse 'dpdkvhost' ports are still available and can be > > enabled using a configure flag, steps for which are available > > in INSTALL.DPDK.md. > > > > A new dpdkvhostuser port will create a unix domain socket which > > when provided to QEMU is used to facilitate communication between > > the virtio-net device on the VM and the OVS port on the host. > > > > Signed-off-by: Ciara Loftus > > Thanks for the patch I have couple of comments. Thanks for the feedback. Replies inline. > > > > > diff --git a/configure.ac b/configure.ac > > index 068674e..3f635b4 100644 > > --- a/configure.ac > > +++ b/configure.ac > > @@ -165,6 +165,7 @@ AC_ARG_VAR(KARCH, [Kernel Architecture String]) > > AC_SUBST(KARCH) > > OVS_CHECK_LINUX > > OVS_CHECK_DPDK > > +OVS_CHECK_VHOST_CUSE > > OVS_CHECK_PRAGMA_MESSAGE > > AC_SUBST([OVS_CFLAGS]) > > AC_SUBST([OVS_LDFLAGS]) > > I think you can parse build/.config file in configure script and check > what type of vhost is configured for the DPDK build. We already do > this for kernel configuration. You can have look at macro > OVS_CHECK_LINUX_COMPAT in acinclude.m4. This way we can avoid new > OVS > configure option. I have included something similar to this in the new version. The configure flag is gone. > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 5af15d4..54ead15 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -28,6 +28,7 @@ > > #include > > #include > > > > +#include "dirs.h" > > #include "dp-packet.h" > > #include "dpif-netdev.h" > > #include "list.h" > > @@ -101,8 +102,18 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / > ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) > > > > #define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ > > > > -/* Character device cuse_dev_name. */ > > -char *cuse_dev_name = NULL; > > +/* For vhost-user, the path where sockets will be created. > > + * For vhost-cuse, the name of the character device. */ > > +char *vhost_dev_or_sock = NULL; > > + > > +#ifdef VHOST_CUSE > > +char vhost_flag[] = "--cuse_dev_name"; > > +char vhost_flag_default_val[] = "vhost-net"; > > +#else > > +#define VHOST_USER > > +char vhost_flag[] = "--vhost_sock_dir"; > > +char vhost_flag_default_val[PATH_MAX]; /* Initialized at runtime via > ovs_rundir */ > > +#endif > > You can get rid of all the #ifdef by refactoring existing vhost code > and defining separate netdev_class for vost_user and vhost_cuse. I've created separate netdev classes for each in the new version. One #ifdef remains, for netdev_register_provider, as only one of either vhost-cuse or vhost-user ports can be used for an instance of the switch. The rest of the #ifdefs are gone, however. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC v2 1/1] netdev-dpdk: add dpdk vhost-user ports
> On 04/24/2015 04:01 PM, Flavio Leitner wrote: > > On Fri, 24 Apr 2015 14:17:17 +0300 > > Panu Matilainen wrote: > > > >> Hi, > >> > >> A few comments inline... > >> > >> On 04/21/2015 01:10 PM, Ciara Loftus wrote: > >>> This patch adds support for a new port type to the userspace > >>> datapath called dpdkvhostuser. It adds to the existing > >>> infrastructure of vhost-cuse, however disables vhost-cuse ports > >>> as the default port type, in favour of vhost-user ports. Refer > >>> to the documentation for enabling vhost-cuse ports if desired. > >>> > >>> A new dpdkvhostuser port will create a unix domain socket which > >>> when provided to QEMU is used to facilitate communication between > >>> the virtio-net device on the VM and the OVS port on the host. > >>> > >>> Signed-off-by: Ciara Loftus > > [...] > > > >>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > >>> index f69154b..deb8b83 100644 > >>> --- a/lib/netdev-dpdk.c > >>> +++ b/lib/netdev-dpdk.c > >>> @@ -101,8 +101,13 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / > >>> ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) > >>> > >>>#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */ > >>> > >>> +#ifdef VHOST_CUSE > >>>/* Character device cuse_dev_name. */ > >>>char *cuse_dev_name = NULL; > >>> +#else > >>> +#define VHOST_USER_PORT_SOCK_PATH "/tmp/%s" /* Socket > Location > >>> Template */ > >> > >> Using /tmp for these seems like asking for trouble to me, how about > >> somewhere /var/run? At least it should be configurable via cli, > >> similar to the cuse device name. > > > > Why using /tmp would be problematic? > > Because it opens up a whole can of worms that is best avoided since > there's no good reason to open it in this case, AFAICS. Predictable > names in a world-writable directory and all, and these are not really > temporary files in the usual sense anyway. > > Current rte_vhost_driver_register() will unconditionally nuke any file > in the path passed to it. I'm sure you can imagine some unwanted > scenarios if 'ovs-vsctl add-port' goes around deleting files in a > world-shared directory :) > > > > > Anyway, systemd has the RunTimeDirectory= option to make sure the > > private directory is created (under /run) when the service is > > started and that's the default RPM %{_rundir} where the sock.db, .pid > > files and other OVS files are created. It seems you could get the right > > path using ovs_rundir(). > > Right. > > - Panu - > > > > > fbl > > Thanks for the feedback, I appreciate the input. The latest revision of the patch sets the default socket location to the return value of ovs_rundir(). The user can also specify an alternate socket location using the --vhost-sock-dir flag on the ovs-vswitchd command line. Thanks, Ciara ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC 0/1] dpif-netdev: Make EMC Size Configurable
> -Original Message- > From: Ben Pfaff [mailto:b...@nicira.com] > Sent: Tuesday, April 21, 2015 4:08 PM > To: Loftus, Ciara > Cc: dev@openvswitch.org > Subject: Re: [ovs-dev] [PATCH RFC 0/1] dpif-netdev: Make EMC Size > Configurable > > On Tue, Apr 21, 2015 at 01:47:12PM +0100, Ciara Loftus wrote: > > This patch allows the user to specify the number of Exact Match Cache > > entries they wish to have available for their instance of the vswitch. > > This is a achieved by means of a 'configure' flag. > > > > At the moment, the default number of EMC entries is 1024, for which > > the switch should perform optimally up to this number of flows. > > However, if the user expects say roughly 2000 flows of traffic, an EMC > > with 2048 entries will perform better, allbeit with a larger demand on > > memory and time spent flushing the cache. If these requirements are > > acceptable to the user, they may choose to allocate 2048 entries like so, > > and in turn receive improved performance for their specific use case: > > > > './configure --with-emcsize=11' > > > > The number of entries is calculated as 2^11 = 2048. > > configure flags are really inconvenient. Is there a reason that this > can't be made configurable at runtime? It would be better yet if the > EMC size could automatically adjust itself. Thanks for the feedback. The implementation is naturally more complex for a runtime configurable but I think achievable - I'll look into it. Thanks, Ciara ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC 1/1] netdev-dpdk: add dpdk vhost-user ports
> This patch adds support for a new port type to the userspace datapath > called dpdkvhostuser. It adds to the existing infrastructure of > vhost-cuse, however disables vhost-cuse ports in favour of vhost-user > ports. > > A new dpdkvhostuser port will create a unix domain socket which > when > provided to QEMU is used to facilitate communication between the > virtio-net device on the VM and the OVS port. > > Signed-off-by: Ciara Loftus > --- > INSTALL.DPDK.md | 115 > > >>> > >>> ... > >>> ... > > > diff --git a/acinclude.m4 b/acinclude.m4 > index 18598b3..2113dfb 100644 > --- a/acinclude.m4 > +++ b/acinclude.m4 > @@ -224,6 +224,19 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > AM_CONDITIONAL([DPDK_NETDEV], test -n "$RTE_SDK") > ]) > > +dnl OVS_CHECK_VHOST_CUSE > +dnl > +dnl Enable DPDK vhost-cuse support in favour of vhost-user > +AC_DEFUN([OVS_CHECK_VHOST_CUSE], [ > + AC_ARG_WITH(vhostcuse, > + [AC_HELP_STRING([--with-vhostcuse], > + [Enable DPDK vhost-cuse])]) > + > + if test X"$with_vhostcuse" != X; then > +AC_DEFINE([VHOST_CUSE], [1], [DPDK vhost-cuse support enabled, > vhost-user disabled.]) > + fi > +]) > + > dnl OVS_GREP_IFELSE(FILE, REGEX, [IF-MATCH], [IF-NO-MATCH]) > dnl > dnl Greps FILE for REGEX. If it matches, runs IF-MATCH, otherwise > IF-NO-MATCH. > diff --git a/configure.ac b/configure.ac > index 8d47eb9..14c4b35 100644 > >>> > >>> > >>> We need to compile-in support for both - vhost-cuse and vhost-user and > >>> use it according to port configuration. This way we can keep single > >>> executable for all use cases. > >> > >> > >> The problem is that this is a compile-time option in DPDK, librte_vhost > only > >> supports one or the other depending on how DPDK was built. > >> > >> The API looks the same in both cases, but behaves differently in that that > >> rte_vhost_driver_register() expects an absolute path for vhost-user and a > >> relative one (under /dev) for vhost-cuse. > >> > > > > ok. > > If you detect vhost implementation build in DPDK library at configure > > time there is no need for another OVS configure option. > > The DPDK configuration is available in rte_config.h so one can just > include that to see whether vhost support is enabled in DPDK at all + > whether its using vhost-user or -cuse. > > - Panu - This may may not be entirely reliable. rte_config.h seems to only reflect the settings that are hard coded in the config/common_linuxapp file in DPDK. If the user chooses to build DPDK using flags on the command line like so: 'make install T=x86_64-ivshmem-linuxapp-gcc CONFIG_RTE_LIBRTE_VHOST=y CONFIG_RTE_BUILD_COMBINE_LIBS=y CONFIG_RTE_LIBRTE_VHOST_USER=n' ..those settings will not be reflected in the rte_config.h file. ie. in the case above, in rte_config.h, CONFIG_RTE_LIBRTE_VHOST_USER=y in the rte_config.h file even though the user has built DPDK with vhost-user off. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC 1/1] netdev-dpdk: add dpdk vhost-user ports
> On Thu, Mar 19, 2015 at 11:48 AM, Ciara Loftus > wrote: > > This patch adds support for a new port type to the userspace datapath > > called dpdkvhostuser. It adds to the existing infrastructure of > > vhost-cuse, however disables vhost-cuse ports in favour of vhost-user > > ports. > > > > A new dpdkvhostuser port will create a unix domain socket which when > > provided to QEMU is used to facilitate communication between the > > virtio-net device on the VM and the OVS port. > > > Thanks for the patch. I have pushed OVS DPDK vHost cuse patch. Once we > add support for vhost user, vhost-cuse support should be dropped. I > think we need to wait for dpdk 2.0 for vhost-user support. So can you > post rebased patch once we move to DPDK 2.0? Sure, will post a rebased patch once 2.0 is available and OVS has support for it. Thanks. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [Qestion] netdev-dpdk: dpdk vhost ports
> > > On 02/24/2015 05:13 PM, Loftus, Ciara wrote: > > > Hi Marcel, > > > > > > On 02/23/2015 12:57 PM, Marcel Apfelbaum wrote: > > >> Hi, > > > I CC-ed the developers that submitted the patch. > > > Thanks again, > > > Marcel > > > > > Hi, > > Thank you for responding to my mail! > You're welcome. > > > > > > > >> > > >> Regarding patch: [ovs-dev] [PATCH RFC v6 1/1] netdev-dpdk: add dpdk > > >> vhost ports > > >> http://openvswitch.org/pipermail/dev/2015-January/050279.html > > >> > > >> What are the Qemu versions supported?(At least the ones that were > > >> tested) > > >> (V5 of this patch was tested with QEMU versions 1.4.2 and 1.6.2) > > > This patch was tested with QEMU 1.6.2 and later 2.1.0. > > Thanks, I tried with QEMU 1.6.2 with no luck. > > > > >> > > >> The reason I ask is that I tried it with a Fedora 21 host with both > > >> QEMU 2.1.2 and QEMU 1.6.2 and I had no luck connecting the guest to > > >> the switch. > > > With QEMU 1.6.2 please make sure that you have hugepages set up and > > that you provide the > > > "-mem-path /path/to/hugepages -mem-prealloc" > > > arguments on the QEMU command line. > > Used that (I read all the documentation - a pretty good one, thanks!) > > > > > > > > Secondly, please make sure that the name of the vhost port you > > > provided > > to add-port matches the "ifname" parameter on the QEMU command line: > > > OVS: ovs-vsctl add-port br0 ovsdpdk0 -- set Interface type=dpdkvhost > > > QEMU: -netdev > > type=tap,id=net1,script=no,downscript=no,ifname=ovsdpdk0,vhost=on > > I had this one too. > > > > > > > > Thirdly, ensure that the eventfd_link.ko module is inserted. > > Checked. > > > > >> > > >> The ovsdpdk0 port always remains DOWN. > > >> Any pointers will be greatly appreciated. > > > Have you tested with traffic? Or are you relying on the config/state > > UP/DOWN to verify if the port is connected to the switch? > > I did both, used ofctl show and tried to ping between > > guest<->host and guest<->other host. No luck. > > > > A little description of my setup: > > I have: > > - a regular Fedora 21 host with a regular NIC connected back-to-back to: > > - a host with Fedora 21 and > > - Intel DPDK-enabled NIC > > - DPDK 1.8.0 > > - Openvswitch (latest) patched with this patch and the other 2 > > recommended patches. > > - I set br0 ip address to something like 192.168.0.100 and I am able > > to ping between > > br0 and the other regular host. Both the guest and the other host > > have ips configured. > > > > However, I am not able to ping between br0<->guest or regular host<- > > >guest. Marcel, please ensure that the firewalld service is disabled on the VM. Secondly, please ensure that you have configured routing such that pings to the guest go through the bridge, rather than the default route. > > > > I think that my problem is on of the two: > > 1. I am missing a configuration step to make the guest accessible. > > - Should ovsdpdk0 port be configured with an IP? (Sorry for my > > lack of > > knowledge...) > On the host, no. > > - When/how should ovsdpdk0 change from PORT_DOWN to PORT_UP? > This has not been fully tested, do not rely on the UP/DOWN state for this > case. > > - Any other step I am missing to complete the setup? > > > > 2. My environment has some 'pieces' that do not play well together, > > incompatible versions or such. > > - dpdk: Should I only use dpdk-1.8.0-rc4 or latest stable works? > I would recommend checking out and using the v1.8.0 tag. > > - openvswith: did you check it against a specific version/commit, > > or latest should work? > I believe the patch will not apply not to the current HEAD due to changes > made since this version was submitted. > I suggest you use this commit: 4c75aaabb15389ddc9ec76e23f09d9282648dc13 > May others will work but I have validated against this one. > > - kernel: does the kernel version play a role on this? > I have validated for 3.17.8-300.fc21.x86_64 > > - Is any other incompatibility possible? > There is a DPDK issue with some kernels before 3.18 where if iommu is > enabled then traffic will fail to pass. Please make sure that if you are > using a > kernel between 3.15 and 3.17 that
Re: [ovs-dev] [Qestion] netdev-dpdk: dpdk vhost ports
> On 02/24/2015 05:13 PM, Loftus, Ciara wrote: > > Hi Marcel, > > > > On 02/23/2015 12:57 PM, Marcel Apfelbaum wrote: > >> Hi, > > I CC-ed the developers that submitted the patch. > > Thanks again, > > Marcel > > > Hi, > Thank you for responding to my mail! You're welcome. > > > > >> > >> Regarding patch: [ovs-dev] [PATCH RFC v6 1/1] netdev-dpdk: add dpdk > >> vhost ports > >> http://openvswitch.org/pipermail/dev/2015-January/050279.html > >> > >> What are the Qemu versions supported?(At least the ones that were > >> tested) > >> (V5 of this patch was tested with QEMU versions 1.4.2 and 1.6.2) > > This patch was tested with QEMU 1.6.2 and later 2.1.0. > Thanks, I tried with QEMU 1.6.2 with no luck. > > >> > >> The reason I ask is that I tried it with a Fedora 21 host with both > >> QEMU 2.1.2 and QEMU 1.6.2 and I had no luck connecting the guest to > >> the switch. > > With QEMU 1.6.2 please make sure that you have hugepages set up and > that you provide the > > "-mem-path /path/to/hugepages -mem-prealloc" > > arguments on the QEMU command line. > Used that (I read all the documentation - a pretty good one, thanks!) > > > > > Secondly, please make sure that the name of the vhost port you provided > to add-port matches the "ifname" parameter on the QEMU command line: > > OVS: ovs-vsctl add-port br0 ovsdpdk0 -- set Interface type=dpdkvhost > > QEMU: -netdev > type=tap,id=net1,script=no,downscript=no,ifname=ovsdpdk0,vhost=on > I had this one too. > > > > > Thirdly, ensure that the eventfd_link.ko module is inserted. > Checked. > > >> > >> The ovsdpdk0 port always remains DOWN. > >> Any pointers will be greatly appreciated. > > Have you tested with traffic? Or are you relying on the config/state > UP/DOWN to verify if the port is connected to the switch? > I did both, used ofctl show and tried to ping between guest<->host > and guest<->other host. No luck. > > A little description of my setup: > I have: > - a regular Fedora 21 host with a regular NIC connected back-to-back to: > - a host with Fedora 21 and > - Intel DPDK-enabled NIC > - DPDK 1.8.0 > - Openvswitch (latest) patched with this patch and the other 2 > recommended patches. > - I set br0 ip address to something like 192.168.0.100 and I am able to ping > between > br0 and the other regular host. Both the guest and the other host have ips > configured. > > However, I am not able to ping between br0<->guest or regular host<- > >guest. > > I think that my problem is on of the two: > 1. I am missing a configuration step to make the guest accessible. > - Should ovsdpdk0 port be configured with an IP? (Sorry for my lack of > knowledge...) On the host, no. > - When/how should ovsdpdk0 change from PORT_DOWN to PORT_UP? This has not been fully tested, do not rely on the UP/DOWN state for this case. > - Any other step I am missing to complete the setup? > > 2. My environment has some 'pieces' that do not play well together, > incompatible versions or such. > - dpdk: Should I only use dpdk-1.8.0-rc4 or latest stable works? I would recommend checking out and using the v1.8.0 tag. > - openvswith: did you check it against a specific version/commit, or > latest > should work? I believe the patch will not apply not to the current HEAD due to changes made since this version was submitted. I suggest you use this commit: 4c75aaabb15389ddc9ec76e23f09d9282648dc13 May others will work but I have validated against this one. > - kernel: does the kernel version play a role on this? I have validated for 3.17.8-300.fc21.x86_64 > - Is any other incompatibility possible? There is a DPDK issue with some kernels before 3.18 where if iommu is enabled then traffic will fail to pass. Please make sure that if you are using a kernel between 3.15 and 3.17 that iommu is not enabled on your system. > > Are there any logs that could help me understand what is happening: > - ovsdpdk0 configuration > - ovsdpdk0 connection to vhost-net > - ... You can set CONFIG_RTE_LIBRTE_VHOST_DEBUG=y in DPDK which will provide more vhost debugging logs, even printing packet contents that are received on the device. > > Your help is very much appreciated, > Marcel > > >> > >> Thank you in advance, > >> Marcel > >> > >> > >> ___ > >> dev mailing list > >> dev@openvswitch.org > >> http://openvswitch.org/mailman/l
Re: [ovs-dev] [Qestion] netdev-dpdk: dpdk vhost ports
Hi Marcel, On 02/23/2015 12:57 PM, Marcel Apfelbaum wrote: > Hi, I CC-ed the developers that submitted the patch. Thanks again, Marcel > > Regarding patch: [ovs-dev] [PATCH RFC v6 1/1] netdev-dpdk: add dpdk > vhost ports > http://openvswitch.org/pipermail/dev/2015-January/050279.html > > What are the Qemu versions supported?(At least the ones that were > tested) > (V5 of this patch was tested with QEMU versions 1.4.2 and 1.6.2) This patch was tested with QEMU 1.6.2 and later 2.1.0. > > The reason I ask is that I tried it with a Fedora 21 host with both > QEMU 2.1.2 and QEMU 1.6.2 and I had no luck connecting the guest to > the switch. With QEMU 1.6.2 please make sure that you have hugepages set up and that you provide the "-mem-path /path/to/hugepages -mem-prealloc" arguments on the QEMU command line. Secondly, please make sure that the name of the vhost port you provided to add-port matches the "ifname" parameter on the QEMU command line: OVS: ovs-vsctl add-port br0 ovsdpdk0 -- set Interface type=dpdkvhost QEMU: -netdev type=tap,id=net1,script=no,downscript=no,ifname=ovsdpdk0,vhost=on Thirdly, ensure that the eventfd_link.ko module is inserted. > > The ovsdpdk0 port always remains DOWN. > Any pointers will be greatly appreciated. Have you tested with traffic? Or are you relying on the config/state UP/DOWN to verify if the port is connected to the switch? > > Thank you in advance, > Marcel > > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev -- Intel Shannon Limited Registered in Ireland Registered Office: Collinstown Industrial Park, Leixlip, County Kildare Registered Number: 308263 Business address: Dromore House, East Park, Shannon, Co. Clare This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/2] miniflow: Use 64-bit data.
Hi, After running some performance tests on the latest master, it appears that this commit has caused netdev DPDK performance to drop significantly (by > 50 %). Has anybody else seen this? Regards, Ciara -Original Message- From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Jarno Rajahalme Sent: Wednesday, December 17, 2014 6:31 PM To: dev@openvswitch.org Subject: [ovs-dev] [PATCH 2/2] miniflow: Use 64-bit data. So far the compressed flow data in struct miniflow has been in 32-bit words with a 63-bit map, allowing for a maximum size of struct flow of 252 bytes. With the forthcoming Geneve options this is not sufficient any more. This patch solves the problem by changing the miniflow data to 64-bit words, doubling the flow max size to 504 bytes. Since the word size is doubled, there is some loss in compression efficiency. To counter this some of the flow fields have been reordered to keep related fields together (e.g., the source and destination IP addresses share the same 64-bit word). This change should speed up flow data processing on 64-bit CPUs, which may help counterbalance the impact of making the struct flow bigger in the future. Classifier lookup stage boundaries are also changed to 64-bit alignment, as the current algorithm depends on each miniflow word to not be split between ranges. This has resulted in new padding (part of the 'mpls_lse' field). The 'dp_hash' field is also moved to packet metadata to eliminate otherwise needed padding there. This allows the L4 to fit into one 64-bit word, and also makes matches on 'dp_hash' more efficient as misses can be found already on stage 1. Signed-off-by: Jarno Rajahalme Summary: --- lib/classifier-private.h | 60 +++--- lib/classifier.c | 72 lib/dpif-netdev.c | 48 ++--- lib/flow.c| 402 +++-- lib/flow.h| 270 +++ lib/match.c | 10 +- lib/nx-match.c|2 +- lib/odp-util.h|2 +- lib/ofp-util.c|2 +- lib/tnl-ports.c |2 +- ofproto/ofproto-dpif-upcall.c | 10 +- ofproto/ofproto-dpif-xlate.c |2 +- ofproto/ofproto.c |2 +- tests/ofproto-dpif.at |2 +- tests/test-classifier.c | 23 +-- 15 files changed, 499 insertions(+), 410 deletions(-) diff --git a/lib/classifier-private.h b/lib/classifier-private.h index 17eed2c..cd64fed 100644 --- a/lib/classifier-private.h +++ b/lib/classifier-private.h @@ -42,7 +42,7 @@ struct cls_subtable { /* These fields are accessed by readers who care about wildcarding. */ const tag_type tag; /* Tag generated from mask for partitioning. */ const uint8_t n_indices; /* How many indices to use. */ -const uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 segment boundaries. */ +const uint8_t index_ofs[CLS_MAX_INDICES]; /* u64 segment boundaries. */ unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask' * (runtime configurable). */ const int ports_mask_len; @@ -112,7 +112,7 @@ miniflow_get_map_in_range(const struct miniflow *miniflow, *offset = count_1bits(map & msk); map &= ~msk; } -if (end < FLOW_U32S) { +if (end < FLOW_U64S) { uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */ map &= msk; } @@ -128,18 +128,18 @@ static inline uint32_t flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask, uint32_t basis) { -const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); -const uint32_t *flow_u32 = (const uint32_t *)flow; -const uint32_t *p = mask_values; +const uint64_t *mask_values = miniflow_get_values(&mask->masks); +const uint64_t *flow_u64 = (const uint64_t *)flow; +const uint64_t *p = mask_values; uint32_t hash; int idx; hash = basis; MAP_FOR_EACH_INDEX(idx, mask->masks.map) { -hash = hash_add(hash, flow_u32[idx] & *p++); +hash = hash_add64(hash, flow_u64[idx] & *p++); } -return hash_finish(hash, (p - mask_values) * 4); +return hash_finish(hash, (p - mask_values) * 8); } /* Returns a hash value for the bits of 'flow' where there are 1-bits in @@ -151,16 +151,16 @@ static inline uint32_t miniflow_hash_in_minimask(const struct miniflow *flow, const struct minimask *mask, uint32_t basis) { -const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); -const uint32_t *p = mask_values; +const uint64_t *mask_values = miniflow_get_values(&mask->masks); +const uint64_t *p = mask_values; uint32_t hash = basis; -uint32_t flow_u32; +uint64_t flow_u64; -MINIFLOW_FOR_EACH_IN_MAP(flow_u32, flow, mask->masks.map) { -hash = hash_add(hash, fl
Re: [ovs-dev] [PATCH RFC 2/2] openvswitch: Userspace tunneling.
Hi, I have been validating the userspace tunneling patches for DPDK and have encountered some problems. My set-up is as follows: 192.168.1.1/24 192.168.1.2/24 ++++ | br1 | | br1 | ++++ | vxlan1| | vxlan1 | ++++ | tap1 (192.168.1.10) | | tap1(192.168.1.20) | ++++ || || || || 172.168.1.1/24172.168.1.2/24 +--++---+ |br0 ||br0 | +--++---+ |dpdk0 |--|dpdk0 | +--++---+ Host A Host B The dpdk devices have been connected back to back and the tap devices are used by VMs on either host. The main issue I am encountering is the vswitchd appears to be crashing and I am unable to root cause the issue. One circumstance where the daemon crashes is if VTEP A is pinged from Host B and vice versa. There have been many other instances where the vswitchd crashes however I have been unable to trace the cause. Secondly, the route added by tnl/route/add seems to silently disappear a couple of seconds after first adding it, as seen in tnl/route/show. However, once it's re-added, it appears to remain. Kind Regards, Ciara -Original Message- From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Jarno Rajahalme Sent: Tuesday, October 14, 2014 1:10 AM To: Pravin Shelar Cc: dev@openvswitch.org Subject: Re: [ovs-dev] [PATCH RFC 2/2] openvswitch: Userspace tunneling. On Oct 13, 2014, at 2:32 PM, Pravin Shelar wrote: >>> +case OVS_ACTION_ATTR_TUNNEL_PUSH: >>> +if (*depth < MAX_RECIRC_DEPTH) { >>> +struct dpif_packet *tnl_pkt[NETDEV_MAX_RX_BATCH]; >>> +int err; >>> + >>> +if (may_steal) { >>> +dp_netdev_clone_pkt_batch(tnl_pkt, packets, cnt); >>> +packets = tnl_pkt; >>> +} >> >> Should this be the reverse? Clone if can NOT take the packets? > right, > >>> + >>> +err = odp_push_tunnel_action(dp, a, packets, cnt); >>> +if (err) { >>> +dp_netdev_drop_packets(tnl_pkt, cnt, may_steal); >>> +break; >>> +} >>> + >>> +(*depth)++; >>> +dp_netdev_input(pmd, packets, cnt); >>> +(*depth)--; >>> +return; >>> +} >> >> Should “break” here. > packets are already consumed so we can not break here. > Do you really intend to fall through to the TUNNEL_POP case? Jarno >>> + >>> +case OVS_ACTION_ATTR_TUNNEL_POP: >>> +if (*depth >= MAX_RECIRC_DEPTH) { >>> +break; >>> +} >>> + >>> +p = ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev -- Intel Shannon Limited Registered in Ireland Registered Office: Collinstown Industrial Park, Leixlip, County Kildare Registered Number: 308263 Business address: Dromore House, East Park, Shannon, Co. Clare This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev