Currently, all of the PMD netdevs can only have the same number of rx queues, which is specified in other_config:n-dpdk-rxqs.
Fix that by introducing of new option for PMD interfaces: 'n_rxq', which specifies the maximum number of rx queues to be created for this interface. Example: ovs-vsctl set Interface dpdk0 options:n_rxq=8 Old 'other_config:n-dpdk-rxqs' deleted. Signed-off-by: Ilya Maximets <i.maxim...@samsung.com> Acked-by: Ben Pfaff <b...@ovn.org> --- Version 3: * Added info to NEWS. * Fixed INSTALL.DPDK.md * Fixed checking 'netdev_n_rxq() != netdev_requested_n_rxq()' Version 2: * Changed wrong comment in struct netdev as suggested by Ben Pfaff. INSTALL.DPDK.md | 4 ++-- NEWS | 4 ++++ lib/dpif-netdev.c | 51 +++++++++++++++++++++++++++------------------- lib/dpif-provider.h | 8 +++----- lib/dpif.c | 5 ++--- lib/dpif.h | 3 +-- lib/netdev-dpdk.c | 26 ++++++++++++++++++----- lib/netdev-provider.h | 6 +++++- lib/netdev.c | 7 +++++++ lib/netdev.h | 1 + ofproto/ofproto-dpif.c | 2 +- ofproto/ofproto-provider.h | 3 --- ofproto/ofproto.c | 7 ------- ofproto/ofproto.h | 1 - vswitchd/bridge.c | 2 -- vswitchd/vswitch.xml | 24 ++++++++++++++-------- 16 files changed, 92 insertions(+), 62 deletions(-) diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index 96b686c..191a2d9 100644 --- a/INSTALL.DPDK.md +++ b/INSTALL.DPDK.md @@ -294,9 +294,9 @@ Performance Tuning: 3. DPDK port Rx Queues - `ovs-vsctl set Open_vSwitch . other_config:n-dpdk-rxqs=<integer>` + `ovs-vsctl set Interface <DPDK interface> options:n_rxq=<integer>` - The command above sets the number of rx queues for each DPDK interface. + The command above sets the number of rx queues for DPDK interface. The rx queues are assigned to pmd threads on the same NUMA node in a round-robin fashion. For more information, please refer to the Open_vSwitch TABLE section in diff --git a/NEWS b/NEWS index 5c18867..11f7753 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,10 @@ Post-v2.5.0 * OpenFlow 1.4+ OFPMP_QUEUE_DESC is now supported. - ovs-ofctl: * queue-get-config command now allows a queue ID to be specified. + - DPDK: + * New option "n_rxq" for PMD interfaces. + Old 'other_config:n-dpdk-rxqs' is no longer supported. + v2.5.0 - xx xxx xxxx --------------------- diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index cd72e62..018abdc 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -221,9 +221,7 @@ struct dp_netdev { * 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */ ovsthread_key_t per_pmd_key; - /* Number of rx queues for each dpdk interface and the cpu mask - * for pin of pmd threads. */ - size_t n_dpdk_rxqs; + /* Cpu mask for pin of pmd threads. */ char *pmd_cmask; uint64_t last_tnl_conf_seq; }; @@ -254,6 +252,8 @@ struct dp_netdev_port { struct netdev_rxq **rxq; struct ovs_refcount ref_cnt; char *type; /* Port type as requested by user. */ + int latest_requested_n_rxq; /* Latest requested from netdev number + of rx queues. */ }; /* Contained by struct dp_netdev_flow's 'stats' member. */ @@ -847,7 +847,6 @@ create_dp_netdev(const char *name, const struct dpif_class *class, ovsthread_key_create(&dp->per_pmd_key, NULL); dp_netdev_set_nonpmd(dp); - dp->n_dpdk_rxqs = NR_QUEUE; ovs_mutex_lock(&dp->port_mutex); error = do_add_port(dp, name, "internal", ODPP_LOCAL); @@ -1086,7 +1085,8 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, /* There can only be ovs_numa_get_n_cores() pmd threads, * so creates a txq for each, and one extra for the non * pmd threads. */ - error = netdev_set_multiq(netdev, n_cores + 1, dp->n_dpdk_rxqs); + error = netdev_set_multiq(netdev, n_cores + 1, + netdev_requested_n_rxq(netdev)); if (error && (error != EOPNOTSUPP)) { VLOG_ERR("%s, cannot set multiq", devname); return errno; @@ -1097,6 +1097,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, port->netdev = netdev; port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev)); port->type = xstrdup(type); + port->latest_requested_n_rxq = netdev_requested_n_rxq(netdev); for (i = 0; i < netdev_n_rxq(netdev); i++) { error = netdev_rxq_open(netdev, &port->rxq[i], i); if (error @@ -2360,32 +2361,42 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) /* Returns true if the configuration for rx queues or cpu mask * is changed. */ static bool -pmd_config_changed(const struct dp_netdev *dp, size_t rxqs, const char *cmask) +pmd_config_changed(const struct dp_netdev *dp, const char *cmask) { - if (dp->n_dpdk_rxqs != rxqs) { - return true; - } else { - if (dp->pmd_cmask != NULL && cmask != NULL) { - return strcmp(dp->pmd_cmask, cmask); - } else { - return (dp->pmd_cmask != NULL || cmask != NULL); + struct dp_netdev_port *port; + + CMAP_FOR_EACH (port, node, &dp->ports) { + struct netdev *netdev = port->netdev; + int requested_n_rxq = netdev_requested_n_rxq(netdev); + if (netdev_is_pmd(netdev) + && port->latest_requested_n_rxq != requested_n_rxq) { + return true; } } + + if (dp->pmd_cmask != NULL && cmask != NULL) { + return strcmp(dp->pmd_cmask, cmask); + } else { + return (dp->pmd_cmask != NULL || cmask != NULL); + } } /* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */ static int -dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask) +dpif_netdev_pmd_set(struct dpif *dpif, const char *cmask) { struct dp_netdev *dp = get_dp_netdev(dpif); - if (pmd_config_changed(dp, n_rxqs, cmask)) { + if (pmd_config_changed(dp, cmask)) { struct dp_netdev_port *port; dp_netdev_destroy_all_pmds(dp); CMAP_FOR_EACH (port, node, &dp->ports) { - if (netdev_is_pmd(port->netdev)) { + struct netdev *netdev = port->netdev; + int requested_n_rxq = netdev_requested_n_rxq(netdev); + if (netdev_is_pmd(port->netdev) + && port->latest_requested_n_rxq != requested_n_rxq) { int i, err; /* Closes the existing 'rxq's. */ @@ -2397,14 +2408,14 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask) /* Sets the new rx queue config. */ err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores() + 1, - n_rxqs); + requested_n_rxq); if (err && (err != EOPNOTSUPP)) { VLOG_ERR("Failed to set dpdk interface %s rx_queue to:" " %u", netdev_get_name(port->netdev), - n_rxqs); + requested_n_rxq); return err; } - + port->latest_requested_n_rxq = requested_n_rxq; /* If the set_multiq() above succeeds, reopens the 'rxq's. */ port->rxq = xrealloc(port->rxq, sizeof *port->rxq * netdev_n_rxq(port->netdev)); @@ -2413,8 +2424,6 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask) } } } - dp->n_dpdk_rxqs = n_rxqs; - /* Reconfigures the cpu mask. */ ovs_numa_set_cpu_mask(cmask); free(dp->pmd_cmask); diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index a9844be..fbd370f 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -318,11 +318,9 @@ struct dpif_class { int (*handlers_set)(struct dpif *dpif, uint32_t n_handlers); /* If 'dpif' creates its own I/O polling threads, refreshes poll threads - * configuration. 'n_rxqs' configures the number of rx_queues, which - * are distributed among threads. 'cmask' configures the cpu mask - * for setting the polling threads' cpu affinity. */ - int (*poll_threads_set)(struct dpif *dpif, unsigned int n_rxqs, - const char *cmask); + * configuration. 'cmask' configures the cpu mask for setting the polling + * threads' cpu affinity. */ + int (*poll_threads_set)(struct dpif *dpif, const char *cmask); /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a * priority value used for setting packet priority. */ diff --git a/lib/dpif.c b/lib/dpif.c index 38e40ba..81cbe11 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1406,13 +1406,12 @@ dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall) /* If 'dpif' creates its own I/O polling threads, refreshes poll threads * configuration. */ int -dpif_poll_threads_set(struct dpif *dpif, unsigned int n_rxqs, - const char *cmask) +dpif_poll_threads_set(struct dpif *dpif, const char *cmask) { int error = 0; if (dpif->dpif_class->poll_threads_set) { - error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, cmask); + error = dpif->dpif_class->poll_threads_set(dpif, cmask); if (error) { log_operation(dpif, "poll_threads_set", error); } diff --git a/lib/dpif.h b/lib/dpif.h index 50174ee..97d5d06 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -836,8 +836,7 @@ void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux); int dpif_recv_set(struct dpif *, bool enable); int dpif_handlers_set(struct dpif *, uint32_t n_handlers); -int dpif_poll_threads_set(struct dpif *, unsigned int n_rxqs, - const char *cmask); +int dpif_poll_threads_set(struct dpif *, const char *cmask); int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *, struct ofpbuf *); void dpif_recv_purge(struct dpif *); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index de7e488..69bdfcc 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -616,6 +616,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no, netdev_->n_txq = NR_QUEUE; netdev_->n_rxq = NR_QUEUE; + netdev_->requested_n_rxq = NR_QUEUE; netdev->real_n_txq = NR_QUEUE; if (type == DPDK_DEV_ETH) { @@ -768,14 +769,15 @@ netdev_dpdk_dealloc(struct netdev *netdev_) } static int -netdev_dpdk_get_config(const struct netdev *netdev_, struct smap *args) +netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) { - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_); + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); ovs_mutex_lock(&dev->mutex); - smap_add_format(args, "configured_rx_queues", "%d", netdev_->n_rxq); - smap_add_format(args, "requested_tx_queues", "%d", netdev_->n_txq); + smap_add_format(args, "requested_rx_queues", "%d", netdev->requested_n_rxq); + smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq); + smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq); smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq); ovs_mutex_unlock(&dev->mutex); @@ -783,6 +785,20 @@ netdev_dpdk_get_config(const struct netdev *netdev_, struct smap *args) } static int +netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args) +{ + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + + ovs_mutex_lock(&dev->mutex); + netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq", + netdev->requested_n_rxq), 1); + netdev_change_seq_changed(netdev); + ovs_mutex_unlock(&dev->mutex); + + return 0; +} + +static int netdev_dpdk_get_numa_id(const struct netdev *netdev_) { struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); @@ -2053,7 +2069,7 @@ unlock_dpdk: DESTRUCT, \ netdev_dpdk_dealloc, \ netdev_dpdk_get_config, \ - NULL, /* netdev_dpdk_set_config */ \ + netdev_dpdk_set_config, \ NULL, /* get_tunnel_config */ \ NULL, /* build header */ \ NULL, /* push header */ \ diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index a33bb3b..d324ffc 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -52,9 +52,13 @@ struct netdev { * 'netdev''s flags, features, ethernet address, or carrier changes. */ uint64_t change_seq; - /* The following are protected by 'netdev_mutex' (internal to netdev.c). */ + /* The core netdev code initializes these at netdev construction and only + * provide read-only access to its client. Netdev implementations may + * modify them. */ int n_txq; int n_rxq; + /* Number of rx queues requested by user. */ + int requested_n_rxq; int ref_cnt; /* Times this devices was opened. */ struct shash_node *node; /* Pointer to element in global map. */ struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */ diff --git a/lib/netdev.c b/lib/netdev.c index e3b70b1..c250c93 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -106,6 +106,12 @@ netdev_n_rxq(const struct netdev *netdev) return netdev->n_rxq; } +int +netdev_requested_n_rxq(const struct netdev *netdev) +{ + return netdev->requested_n_rxq; +} + bool netdev_is_pmd(const struct netdev *netdev) { @@ -376,6 +382,7 @@ netdev_open(const char *name, const char *type, struct netdev **netdevp) /* By default enable one tx and rx queue per netdev. */ netdev->n_txq = netdev->netdev_class->send ? 1 : 0; netdev->n_rxq = netdev->netdev_class->rxq_alloc ? 1 : 0; + netdev->requested_n_rxq = netdev->n_rxq; list_init(&netdev->saved_flags_list); diff --git a/lib/netdev.h b/lib/netdev.h index 622e2ae..8a7f680 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -142,6 +142,7 @@ bool netdev_is_reserved_name(const char *name); int netdev_n_txq(const struct netdev *netdev); int netdev_n_rxq(const struct netdev *netdev); +int netdev_requested_n_rxq(const struct netdev *netdev); bool netdev_is_pmd(const struct netdev *netdev); /* Open and close. */ diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 89e06aa..904cc2a 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -564,7 +564,7 @@ type_run(const char *type) udpif_set_threads(backer->udpif, n_handlers, n_revalidators); } - dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask); + dpif_poll_threads_set(backer->dpif, pmd_cpu_mask); if (backer->need_revalidate) { struct ofproto_dpif *ofproto; diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index b6aac0a..3ba97d0 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -482,9 +482,6 @@ extern unsigned ofproto_max_idle; * ofproto-dpif implementation. */ extern size_t n_handlers, n_revalidators; -/* Number of rx queues to be created for each dpdk interface. */ -extern size_t n_dpdk_rxqs; - /* Cpu mask for pmd threads. */ extern char *pmd_cpu_mask; diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 3faf42a..369da0a 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -337,7 +337,6 @@ unsigned ofproto_flow_limit = OFPROTO_FLOW_LIMIT_DEFAULT; unsigned ofproto_max_idle = OFPROTO_MAX_IDLE_DEFAULT; size_t n_handlers, n_revalidators; -size_t n_dpdk_rxqs; char *pmd_cpu_mask; /* Map from datapath name to struct ofproto, for use by unixctl commands. */ @@ -781,12 +780,6 @@ ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux, } void -ofproto_set_n_dpdk_rxqs(int n_rxqs) -{ - n_dpdk_rxqs = MAX(n_rxqs, 0); -} - -void ofproto_set_cpu_mask(const char *cmask) { free(pmd_cpu_mask); diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 7504027..b99f0cd 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -316,7 +316,6 @@ int ofproto_set_mcast_snooping(struct ofproto *ofproto, int ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux, const struct ofproto_mcast_snooping_port_settings *s); void ofproto_set_threads(int n_handlers, int n_revalidators); -void ofproto_set_n_dpdk_rxqs(int n_rxqs); void ofproto_set_cpu_mask(const char *cmask); void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc); int ofproto_set_snoops(struct ofproto *, const struct sset *snoops); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index f8afe55..7dfdaa3 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -581,8 +581,6 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) OFPROTO_FLOW_LIMIT_DEFAULT)); ofproto_set_max_idle(smap_get_int(&ovs_cfg->other_config, "max-idle", OFPROTO_MAX_IDLE_DEFAULT)); - ofproto_set_n_dpdk_rxqs(smap_get_int(&ovs_cfg->other_config, - "n-dpdk-rxqs", 0)); ofproto_set_cpu_mask(smap_get(&ovs_cfg->other_config, "pmd-cpu-mask")); ofproto_set_threads( diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index ce0dbc1..95e18e1 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -167,15 +167,6 @@ </p> </column> - <column name="other_config" key="n-dpdk-rxqs" - type='{"type": "integer", "minInteger": 1}'> - <p> - Specifies the maximum number of rx queues to be created for each dpdk - interface. If not specified or specified to 0, one rx queue will - be created for each dpdk interface by default. - </p> - </column> - <column name="other_config" key="pmd-cpu-mask"> <p> Specifies CPU mask for setting the cpu affinity of PMD (Poll @@ -2191,6 +2182,21 @@ </column> </group> + <group title="PMD (Poll Mode Driver) Options"> + <p> + Only PMD netdevs support these options. + </p> + + <column name="options" key="n_rxqs" + type='{"type": "integer", "minInteger": 1}'> + <p> + Specifies the maximum number of rx queues to be created for PMD + netdev. If not specified or specified to 0, one rx queue will + be created by default. + </p> + </column> + </group> + <group title="Interface Status"> <p> Status information about interfaces attached to bridges, updated every -- 2.5.0 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev