Hi Aaron,
thanks (again!) for this patch. Few comments:
* As I mentioned on my previous round of review, I don't think it's necessary
to pass the whole Open_vSwitch table to dpdk_init(). I.e., instead of doing
dpdk_init(&cfg);
I'd prefer
if (cfg) {
dpdk_init(&cfg->other_config);
}
This way we don't have to include "vswitch-idl.h".
* I suggested 'dpdk-mem-channels', because I think people who want to use that
will be comfortable passing '-n' in dpdk-extra. What do you think? Is there
any reason why you think it's worth keeping?
* Sorry for not noticing this before: it seems that netdev_dpdk_register() now
always registers the netdev classes, even though they cannot be created. The
registered classes end up in the database in the iface_type column of the
Open_vSwitch table, so controllers might think that they're available. I think
we should register the classes only when DPDK is initialized.
Two minor nits inline,
Thanks
On 26/04/2016 12:42, "Aaron Conole" <[email protected]> wrote:
>Existing DPDK integration is provided by use of command line options which
>must be split out and passed to librte in a special manner. However, this
>forces any configuration to be passed by way of a special DPDK flag, and
>interferes with ovs+dpdk packaging solutions.
>
>This commit delays dpdk initialization until after the OVS database
>connection is established, at which point ovs initializes librte. It
>pulls all of the config data from the OVS database, and assembles a
>new argv/argc pair to be passed along.
>
>Signed-off-by: Aaron Conole <[email protected]>
>Acked-by: Kevin Traynor <[email protected]>
>---
>Previous: http://openvswitch.org/pipermail/dev/2016-April/069028.html
>
>v12:
>* Squashed NEWS change into this commit
>
> FAQ.md | 6 +-
> INSTALL.DPDK.md | 82 +++++++++---
> NEWS | 5 +
> lib/automake.mk | 4 +
> lib/netdev-dpdk.c | 304 +++++++++++++++++++++++++++++++++------------
> lib/netdev-dpdk.h | 14 +--
> lib/netdev-nodpdk.c | 21 ++++
> tests/ofproto-macros.at | 3 +-
> utilities/ovs-dev.py | 13 +-
> vswitchd/bridge.c | 3 +
> vswitchd/ovs-vswitchd.8.in | 6 +-
> vswitchd/ovs-vswitchd.c | 27 +---
> vswitchd/vswitch.xml | 120 ++++++++++++++++++
> 13 files changed, 463 insertions(+), 145 deletions(-)
> create mode 100644 lib/netdev-nodpdk.c
>
[...]
>
>@@ -2732,22 +2734,20 @@ static const struct dpdk_qos_ops egress_policer_ops = {
>
> static int
> process_vhost_flags(char *flag, char *default_val, int size,
>- char **argv, char **new_val)
>+ const struct ovsrec_open_vswitch *ovs_cfg,
>+ char **new_val)
> {
>+ const char *val;
> int changed = 0;
>
>+ val = smap_get(&ovs_cfg->other_config, flag);
>+
> /* Depending on which version of vhost is in use, process the
> vhost-specific
>- * flag if it is provided on the vswitchd command line, otherwise resort
>to
>- * a default value.
>- *
>- * For vhost-user: Process "-vhost_sock_dir" to set the custom location of
>- * the vhost-user socket(s).
>- * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of the
>- * vhost-cuse character device.
>+ * flag if it is provided, otherwise resort to default value.
> */
>- if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) {
>+ if (val && (strlen(val) <= size)) {
> changed = 1;
>- *new_val = xstrdup(argv[2]);
>+ *new_val = xstrdup(val);
> VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
> } else {
> VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
>@@ -2757,68 +2757,186 @@ process_vhost_flags(char *flag, char *default_val,
>int size,
> return changed;
> }
>
>-int
>-dpdk_init(int argc, char **argv)
>+static char **
>+grow_argv(char ***argv, size_t cur_siz, size_t grow_by)
> {
>- int result;
>- int base = 0;
>- char *pragram_name = argv[0];
>- int err;
>- int isset;
>- cpu_set_t cpuset;
>+ return xrealloc(*argv, sizeof(char *) * (cur_siz + grow_by));
>+}
>
>- if (argc < 2 || strcmp(argv[1], "--dpdk"))
>- return 0;
>+static void
>+dpdk_option_extend(char ***argv, int argc, const char *option,
>+ const char *value)
>+{
>+ char **newargv = grow_argv(argv, argc, 2);
>+ *argv = newargv;
>+ newargv[argc] = xstrdup(option);
>+ newargv[argc+1] = xstrdup(value);
>+}
>
>- /* Remove the --dpdk argument from arg list.*/
>- argc--;
>- argv++;
>+static int
>+construct_dpdk_options(const struct ovsrec_open_vswitch *ovs_cfg,
>+ char ***argv, const int initial_size)
>+{
>+ struct dpdk_options_map {
>+ const char *ovs_configuration;
>+ const char *dpdk_option;
>+ bool default_enabled;
>+ const char *default_value;
>+ } opts[] = {
>+ {"dpdk-lcore-mask", "-c", false, NULL},
>+ {"dpdk-mem-channels", "-n", false, NULL},
>+ {"dpdk-hugepage-dir", "--huge-dir", false, NULL},
>+ };
>+
>+ int i, ret = initial_size;
>+
>+ /*First, construct from the flat-options (non-mutex)*/
>+ for (i = 0; i < ARRAY_SIZE(opts); ++i) {
>+ const char *lookup = smap_get(&ovs_cfg->other_config,
>+ opts[i].ovs_configuration);
>+ if (!lookup && opts[i].default_enabled) {
>+ lookup = opts[i].default_value;
>+ }
>
>- /* Reject --user option */
>- int i;
>- for (i = 0; i < argc; i++) {
>- if (!strcmp(argv[i], "--user")) {
>- VLOG_ERR("Can not mix --dpdk and --user options, aborting.");
>+ if (lookup) {
>+ dpdk_option_extend(argv, ret, opts[i].dpdk_option, lookup);
>+ ret += 2;
> }
> }
>
>+ return ret;
>+}
>+
>+#define MAX_DPDK_EXCL_OPTS 10
>+
>+static int
>+construct_dpdk_mutex_options(const struct ovsrec_open_vswitch *ovs_cfg,
>+ char ***argv, const int initial_size)
>+{
>+ struct dpdk_exclusive_options_map {
>+ const char *category;
>+ const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS];
>+ const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS];
>+ const char *default_value;
>+ int default_option;
>+ } excl_opts[] = {
>+ {"memory type",
>+ {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,},
>+ {"-m", "--socket-mem", NULL,},
>+ "1024,0", 1
>+ },
>+ };
>+
>+ int i, ret = initial_size;
>+ for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) {
>+ int found_opts = 0, scan, found_pos = -1;
>+ const char *found_value;
>+ struct dpdk_exclusive_options_map *popt = &excl_opts[i];
>+
>+ for (scan = 0; scan < MAX_DPDK_EXCL_OPTS
>+ && popt->ovs_dpdk_options[scan]; ++scan) {
>+ const char *lookup = smap_get(&ovs_cfg->other_config,
>+ popt->ovs_dpdk_options[scan]);
>+ if (lookup && strlen(lookup)) {
>+ found_opts++;
>+ found_pos = scan;
>+ found_value = lookup;
>+ }
>+ }
>+
>+ if (!found_opts) {
>+ if (popt->default_option) {
>+ found_pos = popt->default_option;
>+ found_value = popt->default_value;
>+ } else {
>+ continue;
>+ }
>+ }
>+
>+ if (found_opts > 1) {
>+ VLOG_ERR("Multiple defined options for %s. Please check your"
>+ " database settings and reconfigure if necessary.",
>+ popt->category);
>+ }
>+
>+ dpdk_option_extend(argv, ret, popt->eal_dpdk_options[found_pos],
>+ found_value);
>+ ret += 2;
>+ }
>+
>+ return ret;
>+}
>+
>+static int
>+get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv)
>+{
>+ int i = construct_dpdk_options(ovs_cfg, argv, 1);
>+ i = construct_dpdk_mutex_options(ovs_cfg, argv, i);
>+ return i;
>+}
>+
>+static char **dpdk_argv;
>+static int dpdk_argc;
>+
>+static void
>+deferred_argv_release(void)
>+{
>+ int result;
>+ for (result = 0; result < dpdk_argc; ++result) {
>+ free(dpdk_argv[result]);
>+ }
>+
>+ free(dpdk_argv);
>+}
>+
>+static void
>+dpdk_init__(const struct ovsrec_open_vswitch *ovs_cfg)
>+{
>+ char **argv = NULL;
>+ int result;
>+ int argc;
>+ int err;
>+ cpu_set_t cpuset;
>+
>+ if (!smap_get_bool(&ovs_cfg->other_config, "dpdk-init", false)) {
>+ VLOG_INFO("DPDK Disabled - to change this requires a restart.\n");
>+ return;
>+ }
>+
>+ VLOG_INFO("DPDK Enabled, initializing");
>+
> #ifdef VHOST_CUSE
>- if (process_vhost_flags("-cuse_dev_name", xstrdup("vhost-net"),
>- PATH_MAX, argv, &cuse_dev_name)) {
>+ if (process_vhost_flags("cuse-dev-name", xstrdup("vhost-net"),
>+ PATH_MAX, ovs_cfg, &cuse_dev_name)) {
> #else
>- if (process_vhost_flags("-vhost_sock_dir", xstrdup(ovs_rundir()),
>- NAME_MAX, argv, &vhost_sock_dir)) {
>+ if (process_vhost_flags("vhost-sock-dir", xstrdup(ovs_rundir()),
>+ NAME_MAX, ovs_cfg, &vhost_sock_dir)) {
> struct stat s;
>- int err;
>
> err = stat(vhost_sock_dir, &s);
> if (err) {
>- VLOG_ERR("vHostUser socket DIR '%s' does not exist.",
>- vhost_sock_dir);
>- return err;
>+ VLOG_ERR("vhost-user sock directory '%s' does not exist.",
>+ vhost_sock_dir);
> }
> #endif
>- /* Remove the vhost flag configuration parameters from the argument
>- * list, so that the correct elements are passed to the DPDK
>- * initialization function
>- */
>- argc -= 2;
>- argv += 2; /* Increment by two to bypass the vhost flag arguments
>*/
>- base = 2;
> }
>
> /* Get the main thread affinity */
> CPU_ZERO(&cpuset);
>- err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
>+ err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t),
>+ &cpuset);
> if (err) {
> VLOG_ERR("Thread getaffinity error %d.", err);
>- return err;
> }
>
>- /* Keep the program name argument as this is needed for call to
>- * rte_eal_init()
>- */
>- argv[0] = pragram_name;
>+ argv = grow_argv(&argv, 0, 1);
>+ argv[0] = xstrdup(ovs_get_program_name());
>+ argc = get_dpdk_args(ovs_cfg, &argv);
>+
>+ argv = grow_argv(&argv, argc, 1);
>+ argv[argc] = 0;
sparse complains about this: we should use NULL instead of 0
>+
>+ optind = 1;
>
> /* Make sure things are initialized ... */
> result = rte_eal_init(argc, argv);
>@@ -2827,23 +2945,51 @@ dpdk_init(int argc, char **argv)
> }
>
> /* Set the main thread affinity back to pre rte_eal_init() value */
>- err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
>- if (err) {
>- VLOG_ERR("Thread setaffinity error %d", err);
>- return err;
>+ if (!err) {
>+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
>+ &cpuset);
>+ if (err) {
>+ VLOG_ERR("Thread setaffinity error %d", err);
>+ }
> }
>
>+ dpdk_argv = argv;
>+ dpdk_argc = argc;
>+
>+ atexit(deferred_argv_release);
>+
> rte_memzone_dump(stdout);
> rte_eal_init_ret = 0;
>
>- if (argc > result) {
>- argv[result] = argv[0];
>- }
>-
> /* We are called from the main thread here */
> RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
>
>- return result + 1 + base;
>+ ovs_thread_create("dpdk_watchdog", dpdk_watchdog, NULL);
>+
>+#ifdef VHOST_CUSE
>+ /* Register CUSE device to handle IOCTLs.
>+ * Unless otherwise specified, cuse_dev_name is set to vhost-net.
>+ */
>+ err = rte_vhost_driver_register(cuse_dev_name);
>+
>+ if (err != 0) {
>+ VLOG_ERR("CUSE device setup failure.");
>+ return;
>+ }
>+#endif
>+
>+ dpdk_vhost_class_init();
>+}
>+
>+void
>+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg)
>+{
>+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>+
>+ if (ovs_cfg && ovsthread_once_start(&once)) {
>+ dpdk_init__(ovs_cfg);
>+ ovsthread_once_done(&once);
>+ }
> }
>
> static const struct netdev_class dpdk_class =
>@@ -2907,10 +3053,6 @@ netdev_dpdk_register(void)
> {
> static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>
>- if (rte_eal_init_ret) {
>- return;
>- }
>-
> if (ovsthread_once_start(&once)) {
> dpdk_common_init();
> netdev_register_provider(&dpdk_class);
>diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
>index 646d3e2..143a609 100644
>--- a/lib/netdev-dpdk.h
>+++ b/lib/netdev-dpdk.h
>@@ -4,6 +4,7 @@
> #include <config.h>
>
> struct dp_packet;
>+struct ovsrec_open_vswitch;
>
> #ifdef DPDK_NETDEV
>
>@@ -22,7 +23,6 @@ struct dp_packet;
>
> #define NON_PMD_CORE_ID LCORE_ID_ANY
>
>-int dpdk_init(int argc, char **argv);
> void netdev_dpdk_register(void);
> void free_dpdk_buf(struct dp_packet *);
> int pmd_thread_setaffinity_cpu(unsigned cpu);
>@@ -33,15 +33,6 @@ int pmd_thread_setaffinity_cpu(unsigned cpu);
>
> #include "util.h"
>
>-static inline int
>-dpdk_init(int argc, char **argv)
>-{
>- if (argc >= 2 && !strcmp(argv[1], "--dpdk")) {
>- ovs_fatal(0, "DPDK support not built into this copy of Open
>vSwitch.");
>- }
>- return 0;
>-}
>-
> static inline void
> netdev_dpdk_register(void)
> {
>@@ -61,4 +52,7 @@ pmd_thread_setaffinity_cpu(unsigned cpu OVS_UNUSED)
> }
>
> #endif /* DPDK_NETDEV */
>+
>+void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg);
>+
> #endif
>diff --git a/lib/netdev-nodpdk.c b/lib/netdev-nodpdk.c
>new file mode 100644
>index 0000000..28e637a
>--- /dev/null
>+++ b/lib/netdev-nodpdk.c
>@@ -0,0 +1,21 @@
>+#include <config.h>
>+#include "netdev-dpdk.h"
>+#include "smap.h"
>+#include "ovs-thread.h"
>+#include "openvswitch/vlog.h"
>+#include "vswitch-idl.h"
>+
>+VLOG_DEFINE_THIS_MODULE(dpdk);
>+
>+void
>+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED)
This is used now
>+{
>+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
>+
>+ if (ovs_cfg && ovsthread_once_start(&once)) {
>+ if (smap_get_bool(&ovs_cfg->other_config, "dpdk-init", false)) {
>+ VLOG_ERR("DPDK not supported in this copy of Open vSwitch.");
>+ }
>+ ovsthread_once_done(&once);
>+ }
>+}
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev