Daniele Di Proietto <diproiet...@vmware.com> writes: > Hi Aaron, > > thanks (again!) for this patch. Few comments: > > * As I mentioned on my previous round of review, I don't think it's necessary > to pass the whole Open_vSwitch table to dpdk_init(). I.e., instead of doing > > dpdk_init(&cfg); > > I'd prefer > > if (cfg) { > dpdk_init(&cfg->other_config); > } > > This way we don't have to include "vswitch-idl.h". > > > * I suggested 'dpdk-mem-channels', because I think people who want to use > that will be comfortable passing '-n' in dpdk-extra. What do you think? Is > there any reason why you think it's worth keeping?
D'oh! I had done both of these changes, but they were dropped during the rebase. I'll cook a fix asap. > * Sorry for not noticing this before: it seems that netdev_dpdk_register() > now always registers the netdev classes, even though they cannot be created. > The registered classes end up in the database in the iface_type column of the > Open_vSwitch table, so controllers might think that they're available. I > think we should register the classes only when DPDK is initialized. I had an issue doing this, back when the I had the lazy initialization. I don't remember the details, though. I'll try it again, and see what happens. > Two minor nits inline, > > Thanks Thanks so much for the review, Daniele! -Aaron > On 26/04/2016 12:42, "Aaron Conole" <acon...@redhat.com> wrote: > >>Existing DPDK integration is provided by use of command line options which >>must be split out and passed to librte in a special manner. However, this >>forces any configuration to be passed by way of a special DPDK flag, and >>interferes with ovs+dpdk packaging solutions. >> >>This commit delays dpdk initialization until after the OVS database >>connection is established, at which point ovs initializes librte. It >>pulls all of the config data from the OVS database, and assembles a >>new argv/argc pair to be passed along. >> >>Signed-off-by: Aaron Conole <acon...@redhat.com> >>Acked-by: Kevin Traynor <kevin.tray...@intel.com> >>--- >>Previous: http://openvswitch.org/pipermail/dev/2016-April/069028.html >> >>v12: >>* Squashed NEWS change into this commit >> >> FAQ.md | 6 +- >> INSTALL.DPDK.md | 82 +++++++++--- >> NEWS | 5 + >> lib/automake.mk | 4 + >> lib/netdev-dpdk.c | 304 >> +++++++++++++++++++++++++++++++++------------ >> lib/netdev-dpdk.h | 14 +-- >> lib/netdev-nodpdk.c | 21 ++++ >> tests/ofproto-macros.at | 3 +- >> utilities/ovs-dev.py | 13 +- >> vswitchd/bridge.c | 3 + >> vswitchd/ovs-vswitchd.8.in | 6 +- >> vswitchd/ovs-vswitchd.c | 27 +--- >> vswitchd/vswitch.xml | 120 ++++++++++++++++++ >> 13 files changed, 463 insertions(+), 145 deletions(-) >> create mode 100644 lib/netdev-nodpdk.c >> > > [...] > >> >>@@ -2732,22 +2734,20 @@ static const struct dpdk_qos_ops egress_policer_ops = >>{ >> >> static int >> process_vhost_flags(char *flag, char *default_val, int size, >>- char **argv, char **new_val) >>+ const struct ovsrec_open_vswitch *ovs_cfg, >>+ char **new_val) >> { >>+ const char *val; >> int changed = 0; >> >>+ val = smap_get(&ovs_cfg->other_config, flag); >>+ >> /* Depending on which version of vhost is in use, process the >> vhost-specific >>- * flag if it is provided on the vswitchd command line, otherwise resort >>to >>- * a default value. >>- * >>- * For vhost-user: Process "-vhost_sock_dir" to set the custom location >>of >>- * the vhost-user socket(s). >>- * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of the >>- * vhost-cuse character device. >>+ * flag if it is provided, otherwise resort to default value. >> */ >>- if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) { >>+ if (val && (strlen(val) <= size)) { >> changed = 1; >>- *new_val = xstrdup(argv[2]); >>+ *new_val = xstrdup(val); >> VLOG_INFO("User-provided %s in use: %s", flag, *new_val); >> } else { >> VLOG_INFO("No %s provided - defaulting to %s", flag, default_val); >>@@ -2757,68 +2757,186 @@ process_vhost_flags(char *flag, char *default_val, >>int size, >> return changed; >> } >> >>-int >>-dpdk_init(int argc, char **argv) >>+static char ** >>+grow_argv(char ***argv, size_t cur_siz, size_t grow_by) >> { >>- int result; >>- int base = 0; >>- char *pragram_name = argv[0]; >>- int err; >>- int isset; >>- cpu_set_t cpuset; >>+ return xrealloc(*argv, sizeof(char *) * (cur_siz + grow_by)); >>+} >> >>- if (argc < 2 || strcmp(argv[1], "--dpdk")) >>- return 0; >>+static void >>+dpdk_option_extend(char ***argv, int argc, const char *option, >>+ const char *value) >>+{ >>+ char **newargv = grow_argv(argv, argc, 2); >>+ *argv = newargv; >>+ newargv[argc] = xstrdup(option); >>+ newargv[argc+1] = xstrdup(value); >>+} >> >>- /* Remove the --dpdk argument from arg list.*/ >>- argc--; >>- argv++; >>+static int >>+construct_dpdk_options(const struct ovsrec_open_vswitch *ovs_cfg, >>+ char ***argv, const int initial_size) >>+{ >>+ struct dpdk_options_map { >>+ const char *ovs_configuration; >>+ const char *dpdk_option; >>+ bool default_enabled; >>+ const char *default_value; >>+ } opts[] = { >>+ {"dpdk-lcore-mask", "-c", false, NULL}, >>+ {"dpdk-mem-channels", "-n", false, NULL}, >>+ {"dpdk-hugepage-dir", "--huge-dir", false, NULL}, >>+ }; >>+ >>+ int i, ret = initial_size; >>+ >>+ /*First, construct from the flat-options (non-mutex)*/ >>+ for (i = 0; i < ARRAY_SIZE(opts); ++i) { >>+ const char *lookup = smap_get(&ovs_cfg->other_config, >>+ opts[i].ovs_configuration); >>+ if (!lookup && opts[i].default_enabled) { >>+ lookup = opts[i].default_value; >>+ } >> >>- /* Reject --user option */ >>- int i; >>- for (i = 0; i < argc; i++) { >>- if (!strcmp(argv[i], "--user")) { >>- VLOG_ERR("Can not mix --dpdk and --user options, aborting."); >>+ if (lookup) { >>+ dpdk_option_extend(argv, ret, opts[i].dpdk_option, lookup); >>+ ret += 2; >> } >> } >> >>+ return ret; >>+} >>+ >>+#define MAX_DPDK_EXCL_OPTS 10 >>+ >>+static int >>+construct_dpdk_mutex_options(const struct ovsrec_open_vswitch *ovs_cfg, >>+ char ***argv, const int initial_size) >>+{ >>+ struct dpdk_exclusive_options_map { >>+ const char *category; >>+ const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS]; >>+ const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS]; >>+ const char *default_value; >>+ int default_option; >>+ } excl_opts[] = { >>+ {"memory type", >>+ {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,}, >>+ {"-m", "--socket-mem", NULL,}, >>+ "1024,0", 1 >>+ }, >>+ }; >>+ >>+ int i, ret = initial_size; >>+ for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) { >>+ int found_opts = 0, scan, found_pos = -1; >>+ const char *found_value; >>+ struct dpdk_exclusive_options_map *popt = &excl_opts[i]; >>+ >>+ for (scan = 0; scan < MAX_DPDK_EXCL_OPTS >>+ && popt->ovs_dpdk_options[scan]; ++scan) { >>+ const char *lookup = smap_get(&ovs_cfg->other_config, >>+ popt->ovs_dpdk_options[scan]); >>+ if (lookup && strlen(lookup)) { >>+ found_opts++; >>+ found_pos = scan; >>+ found_value = lookup; >>+ } >>+ } >>+ >>+ if (!found_opts) { >>+ if (popt->default_option) { >>+ found_pos = popt->default_option; >>+ found_value = popt->default_value; >>+ } else { >>+ continue; >>+ } >>+ } >>+ >>+ if (found_opts > 1) { >>+ VLOG_ERR("Multiple defined options for %s. Please check your" >>+ " database settings and reconfigure if necessary.", >>+ popt->category); >>+ } >>+ >>+ dpdk_option_extend(argv, ret, popt->eal_dpdk_options[found_pos], >>+ found_value); >>+ ret += 2; >>+ } >>+ >>+ return ret; >>+} >>+ >>+static int >>+get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv) >>+{ >>+ int i = construct_dpdk_options(ovs_cfg, argv, 1); >>+ i = construct_dpdk_mutex_options(ovs_cfg, argv, i); >>+ return i; >>+} >>+ >>+static char **dpdk_argv; >>+static int dpdk_argc; >>+ >>+static void >>+deferred_argv_release(void) >>+{ >>+ int result; >>+ for (result = 0; result < dpdk_argc; ++result) { >>+ free(dpdk_argv[result]); >>+ } >>+ >>+ free(dpdk_argv); >>+} >>+ >>+static void >>+dpdk_init__(const struct ovsrec_open_vswitch *ovs_cfg) >>+{ >>+ char **argv = NULL; >>+ int result; >>+ int argc; >>+ int err; >>+ cpu_set_t cpuset; >>+ >>+ if (!smap_get_bool(&ovs_cfg->other_config, "dpdk-init", false)) { >>+ VLOG_INFO("DPDK Disabled - to change this requires a restart.\n"); >>+ return; >>+ } >>+ >>+ VLOG_INFO("DPDK Enabled, initializing"); >>+ >> #ifdef VHOST_CUSE >>- if (process_vhost_flags("-cuse_dev_name", xstrdup("vhost-net"), >>- PATH_MAX, argv, &cuse_dev_name)) { >>+ if (process_vhost_flags("cuse-dev-name", xstrdup("vhost-net"), >>+ PATH_MAX, ovs_cfg, &cuse_dev_name)) { >> #else >>- if (process_vhost_flags("-vhost_sock_dir", xstrdup(ovs_rundir()), >>- NAME_MAX, argv, &vhost_sock_dir)) { >>+ if (process_vhost_flags("vhost-sock-dir", xstrdup(ovs_rundir()), >>+ NAME_MAX, ovs_cfg, &vhost_sock_dir)) { >> struct stat s; >>- int err; >> >> err = stat(vhost_sock_dir, &s); >> if (err) { >>- VLOG_ERR("vHostUser socket DIR '%s' does not exist.", >>- vhost_sock_dir); >>- return err; >>+ VLOG_ERR("vhost-user sock directory '%s' does not exist.", >>+ vhost_sock_dir); >> } >> #endif >>- /* Remove the vhost flag configuration parameters from the argument >>- * list, so that the correct elements are passed to the DPDK >>- * initialization function >>- */ >>- argc -= 2; >>- argv += 2; /* Increment by two to bypass the vhost flag arguments >>*/ >>- base = 2; >> } >> >> /* Get the main thread affinity */ >> CPU_ZERO(&cpuset); >>- err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); >>+ err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), >>+ &cpuset); >> if (err) { >> VLOG_ERR("Thread getaffinity error %d.", err); >>- return err; >> } >> >>- /* Keep the program name argument as this is needed for call to >>- * rte_eal_init() >>- */ >>- argv[0] = pragram_name; >>+ argv = grow_argv(&argv, 0, 1); >>+ argv[0] = xstrdup(ovs_get_program_name()); >>+ argc = get_dpdk_args(ovs_cfg, &argv); >>+ >>+ argv = grow_argv(&argv, argc, 1); >>+ argv[argc] = 0; > > sparse complains about this: we should use NULL instead of 0 sorry, I'll fix it. >>+ >>+ optind = 1; >> >> /* Make sure things are initialized ... */ >> result = rte_eal_init(argc, argv); >>@@ -2827,23 +2945,51 @@ dpdk_init(int argc, char **argv) >> } >> >> /* Set the main thread affinity back to pre rte_eal_init() value */ >>- err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); >>- if (err) { >>- VLOG_ERR("Thread setaffinity error %d", err); >>- return err; >>+ if (!err) { >>+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), >>+ &cpuset); >>+ if (err) { >>+ VLOG_ERR("Thread setaffinity error %d", err); >>+ } >> } >> >>+ dpdk_argv = argv; >>+ dpdk_argc = argc; >>+ >>+ atexit(deferred_argv_release); >>+ >> rte_memzone_dump(stdout); >> rte_eal_init_ret = 0; >> >>- if (argc > result) { >>- argv[result] = argv[0]; >>- } >>- >> /* We are called from the main thread here */ >> RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; >> >>- return result + 1 + base; >>+ ovs_thread_create("dpdk_watchdog", dpdk_watchdog, NULL); >>+ >>+#ifdef VHOST_CUSE >>+ /* Register CUSE device to handle IOCTLs. >>+ * Unless otherwise specified, cuse_dev_name is set to vhost-net. >>+ */ >>+ err = rte_vhost_driver_register(cuse_dev_name); >>+ >>+ if (err != 0) { >>+ VLOG_ERR("CUSE device setup failure."); >>+ return; >>+ } >>+#endif >>+ >>+ dpdk_vhost_class_init(); >>+} >>+ >>+void >>+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg) >>+{ >>+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; >>+ >>+ if (ovs_cfg && ovsthread_once_start(&once)) { >>+ dpdk_init__(ovs_cfg); >>+ ovsthread_once_done(&once); >>+ } >> } >> >> static const struct netdev_class dpdk_class = >>@@ -2907,10 +3053,6 @@ netdev_dpdk_register(void) >> { >> static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; >> >>- if (rte_eal_init_ret) { >>- return; >>- } >>- >> if (ovsthread_once_start(&once)) { >> dpdk_common_init(); >> netdev_register_provider(&dpdk_class); >>diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h >>index 646d3e2..143a609 100644 >>--- a/lib/netdev-dpdk.h >>+++ b/lib/netdev-dpdk.h >>@@ -4,6 +4,7 @@ >> #include <config.h> >> >> struct dp_packet; >>+struct ovsrec_open_vswitch; >> >> #ifdef DPDK_NETDEV >> >>@@ -22,7 +23,6 @@ struct dp_packet; >> >> #define NON_PMD_CORE_ID LCORE_ID_ANY >> >>-int dpdk_init(int argc, char **argv); >> void netdev_dpdk_register(void); >> void free_dpdk_buf(struct dp_packet *); >> int pmd_thread_setaffinity_cpu(unsigned cpu); >>@@ -33,15 +33,6 @@ int pmd_thread_setaffinity_cpu(unsigned cpu); >> >> #include "util.h" >> >>-static inline int >>-dpdk_init(int argc, char **argv) >>-{ >>- if (argc >= 2 && !strcmp(argv[1], "--dpdk")) { >>- ovs_fatal(0, "DPDK support not built into this copy of Open >>vSwitch."); >>- } >>- return 0; >>-} >>- >> static inline void >> netdev_dpdk_register(void) >> { >>@@ -61,4 +52,7 @@ pmd_thread_setaffinity_cpu(unsigned cpu OVS_UNUSED) >> } >> >> #endif /* DPDK_NETDEV */ >>+ >>+void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg); >>+ >> #endif >>diff --git a/lib/netdev-nodpdk.c b/lib/netdev-nodpdk.c >>new file mode 100644 >>index 0000000..28e637a >>--- /dev/null >>+++ b/lib/netdev-nodpdk.c >>@@ -0,0 +1,21 @@ >>+#include <config.h> >>+#include "netdev-dpdk.h" >>+#include "smap.h" >>+#include "ovs-thread.h" >>+#include "openvswitch/vlog.h" >>+#include "vswitch-idl.h" >>+ >>+VLOG_DEFINE_THIS_MODULE(dpdk); >>+ >>+void >>+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED) > > This is used now Okay, I'll fix. >>+{ >>+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; >>+ >>+ if (ovs_cfg && ovsthread_once_start(&once)) { >>+ if (smap_get_bool(&ovs_cfg->other_config, "dpdk-init", false)) { >>+ VLOG_ERR("DPDK not supported in this copy of Open vSwitch."); >>+ } >>+ ovsthread_once_done(&once); >>+ } >>+} _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev