This patch introduces multi-threading for ovn-controller and use
dedicated thread for packet-in processing as a start. It decouples
packet-in processing and ovs flow computing, so that packet-in inputs
won't trigger flow recomputing, and flow computing won't block
packet-in processing. In large scale environment this largely reduces
CPU cost and improves performance.

Related effort and discussion:
https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/331813.html

Signed-off-by: Han Zhou <zhou...@gmail.com>
---
v2->v3:
- use binding_get() instead of binding_run() in pinctrl thread.
- waiting on input for sharing ovsdb sb idl, problem and proposals here:
    https://mail.openvswitch.org/pipermail/ovs-dev/2017-June/333927.html

 ovn/controller/ovn-controller.c | 43 ++++++++++++------
 ovn/controller/ovn-controller.h | 33 ++++++++++++++
 ovn/controller/pinctrl.c        | 96 +++++++++++++++++++++++++++++++++++++++++
 ovn/controller/pinctrl.h        |  1 +
 4 files changed, 161 insertions(+), 12 deletions(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index cc01fe9..2885ac0 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -54,6 +54,8 @@
 #include "stream.h"
 #include "unixctl.h"
 #include "util.h"
+#include "latch.h"
+#include "ovs-thread.h"
 
 VLOG_DEFINE_THIS_MODULE(main);
 
@@ -64,8 +66,6 @@ static unixctl_cb_func inject_pkt;
 #define DEFAULT_BRIDGE_NAME "br-int"
 #define DEFAULT_PROBE_INTERVAL_MSEC 5000
 
-static void update_probe_interval(struct controller_ctx *,
-                                  const char *ovnsb_remote);
 static void parse_options(int argc, char *argv[]);
 OVS_NO_RETURN static void usage(void);
 
@@ -76,7 +76,7 @@ struct pending_pkt {
     char *flow_s;
 };
 
-static char *ovs_remote;
+char *ovs_remote;
 
 struct local_datapath *
 get_local_datapath(const struct hmap *local_datapaths, uint32_t tunnel_key)
@@ -127,7 +127,7 @@ get_bridge(struct ovsdb_idl *ovs_idl, const char *br_name)
     return NULL;
 }
 
-static void
+void
 update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
                    const struct sbrec_chassis *chassis,
                    const struct sset *local_ifaces,
@@ -254,7 +254,7 @@ create_br_int(struct controller_ctx *ctx)
     return bridge;
 }
 
-static const struct ovsrec_bridge *
+const struct ovsrec_bridge *
 get_br_int(struct controller_ctx *ctx)
 {
     const struct ovsrec_open_vswitch *cfg;
@@ -266,7 +266,7 @@ get_br_int(struct controller_ctx *ctx)
     return get_bridge(ctx->ovs_idl, br_int_name(cfg));
 }
 
-static const char *
+const char *
 get_chassis_id(const struct ovsdb_idl *ovs_idl)
 {
     const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(ovs_idl);
@@ -306,7 +306,7 @@ update_ssl_config(const struct ovsdb_idl *ovs_idl)
 
 /* Retrieves the OVN Southbound remote location from the
  * "external-ids:ovn-remote" key in 'ovs_idl' and returns a copy of it. */
-static char *
+char *
 get_ovnsb_remote(struct ovsdb_idl *ovs_idl)
 {
     while (1) {
@@ -495,6 +495,22 @@ get_nb_cfg(struct ovsdb_idl *idl)
 }
 
 static void
+ctrl_thread_create(struct ctrl_thread *thread, const char *name,
+    void *(*start)(void *))
+{
+    latch_init(&thread->exit_latch);
+    thread->thread = ovs_thread_create(name, start, thread);
+}
+
+static void
+ctrl_thread_exit(struct ctrl_thread *thread)
+{
+    latch_set(&thread->exit_latch);
+    xpthread_join(thread->thread, NULL);
+    latch_destroy(&thread->exit_latch);
+}
+
+void
 ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
 {
     /* We do not monitor all tables by default, so modules must register
@@ -559,7 +575,6 @@ main(int argc, char *argv[])
     daemonize_complete();
 
     ofctrl_init(&group_table);
-    pinctrl_init();
     lflow_init();
 
     /* Connect to OVS OVSDB instance. */
@@ -590,6 +605,10 @@ main(int argc, char *argv[])
     unixctl_command_register("inject-pkt", "MICROFLOW", 1, 1, inject_pkt,
                              &pending_pkt);
 
+
+    struct ctrl_thread pinctrl_thread;
+    ctrl_thread_create(&pinctrl_thread, "pinctrl", pinctrl_thread_main);
+
     /* Main loop. */
     exiting = false;
     while (!exiting) {
@@ -654,7 +673,6 @@ main(int argc, char *argv[])
             enum mf_field_id mff_ovn_geneve = ofctrl_run(br_int,
                                                          &pending_ct_zones);
 
-            pinctrl_run(&ctx, &lports, br_int, chassis, &local_datapaths);
             update_ct_zones(&local_lports, &local_datapaths, &ct_zones,
                             ct_zone_bitmap, &pending_ct_zones);
             if (ctx.ovs_idl_txn) {
@@ -734,7 +752,6 @@ main(int argc, char *argv[])
 
         if (br_int) {
             ofctrl_wait();
-            pinctrl_wait(&ctx);
         }
         ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
 
@@ -783,10 +800,12 @@ main(int argc, char *argv[])
         poll_block();
     }
 
+    /* stop child controller threads */
+    ctrl_thread_exit(&pinctrl_thread);
+
     unixctl_server_destroy(unixctl);
     lflow_destroy();
     ofctrl_destroy();
-    pinctrl_destroy();
 
     simap_destroy(&ct_zones);
 
@@ -943,7 +962,7 @@ inject_pkt(struct unixctl_conn *conn, int argc OVS_UNUSED,
 
 /* Get the desired SB probe timer from the OVS database and configure it into
  * the SB database. */
-static void
+void
 update_probe_interval(struct controller_ctx *ctx, const char *ovnsb_remote)
 {
     const struct ovsrec_open_vswitch *cfg
diff --git a/ovn/controller/ovn-controller.h b/ovn/controller/ovn-controller.h
index 4bc0467..7314cef 100644
--- a/ovn/controller/ovn-controller.h
+++ b/ovn/controller/ovn-controller.h
@@ -18,7 +18,9 @@
 #define OVN_CONTROLLER_H 1
 
 #include "simap.h"
+#include "sset.h"
 #include "ovn/lib/ovn-sb-idl.h"
+#include "latch.h"
 
 /* Linux supports a maximum of 64K zones, which seems like a fine default. */
 #define MAX_CT_ZONES 65535
@@ -68,6 +70,13 @@ struct local_datapath {
     bool has_local_l3gateway;
 };
 
+struct ctrl_thread {
+    pthread_t thread;
+
+    /* Controls thread exit. */
+    struct latch exit_latch;
+};
+
 struct local_datapath *get_local_datapath(const struct hmap *,
                                           uint32_t tunnel_key);
 
@@ -87,5 +96,29 @@ enum chassis_tunnel_type {
 
 uint32_t get_tunnel_type(const char *name);
 
+/* Retrieves the OVN Southbound remote location from the
+ * "external-ids:ovn-remote" key in 'ovs_idl' and returns a copy of it. */
+char *get_ovnsb_remote(struct ovsdb_idl *ovs_idl);
+
+void
+update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
+                   const struct sbrec_chassis *chassis,
+                   const struct sset *local_ifaces,
+                   struct hmap *local_datapaths);
+
+/* Get the desired SB probe timer from the OVS database and configure it into
+ * the SB database. */
+void
+update_probe_interval(struct controller_ctx *ctx, const char *ovnsb_remote);
+
+const struct ovsrec_bridge *
+get_br_int(struct controller_ctx *ctx);
+
+const char *
+get_chassis_id(const struct ovsdb_idl *ovs_idl);
+
+void
+ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl);
 
+extern char *ovs_remote;
 #endif /* ovn/ovn-controller.h */
diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index 660233a..c9806a7 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -46,6 +46,8 @@
 #include "socket-util.h"
 #include "timeval.h"
 #include "vswitch-idl.h"
+#include "latch.h"
+#include "binding.h"
 
 VLOG_DEFINE_THIS_MODULE(pinctrl);
 
@@ -81,6 +83,100 @@ static void reload_metadata(struct ofpbuf *ofpacts,
 
 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
 
+void *
+pinctrl_thread_main(void *arg)
+{
+    struct ctrl_thread *thread = arg;
+    pinctrl_init();
+
+    struct ovsdb_idl_loop ovs_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
+        ovsdb_idl_create(ovs_remote, &ovsrec_idl_class, false, true));
+    ctrl_register_ovs_idl(ovs_idl_loop.idl);
+    ovsdb_idl_get_initial_snapshot(ovs_idl_loop.idl);
+
+    char *ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
+    struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
+        ovsdb_idl_create(ovnsb_remote, &sbrec_idl_class, true, true));
+    ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
+    update_sb_monitors(ovnsb_idl_loop.idl, NULL, NULL, NULL);
+    ovsdb_idl_get_initial_snapshot(ovnsb_idl_loop.idl);
+
+    while (!latch_is_set(&thread->exit_latch)) {
+        /* Below logic is similar as in main loop in ovn-controller.c,
+         * while the purpose here is packet-in processing only */
+        char *new_ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
+        if (strcmp(ovnsb_remote, new_ovnsb_remote)) {
+            free(ovnsb_remote);
+            ovnsb_remote = new_ovnsb_remote;
+            ovsdb_idl_set_remote(ovnsb_idl_loop.idl, ovnsb_remote, true);
+        } else {
+            free(new_ovnsb_remote);
+        }
+
+        struct controller_ctx ctx = {
+            .ovs_idl = ovs_idl_loop.idl,
+            .ovs_idl_txn = ovsdb_idl_loop_run(&ovs_idl_loop),
+            .ovnsb_idl = ovnsb_idl_loop.idl,
+            .ovnsb_idl_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
+        };
+
+        update_probe_interval(&ctx, ovnsb_remote);
+
+        struct hmap local_datapaths = HMAP_INITIALIZER(&local_datapaths);
+        struct sset local_lports = SSET_INITIALIZER(&local_lports);
+        const struct ovsrec_bridge *br_int = get_br_int(&ctx);
+        const char *chassis_id = get_chassis_id(ctx.ovs_idl);
+        struct ldatapath_index ldatapaths;
+        struct lport_index lports;
+        ldatapath_index_init(&ldatapaths, ctx.ovnsb_idl);
+        lport_index_init(&lports, ctx.ovnsb_idl);
+
+        const struct sbrec_chassis *chassis = NULL;
+        if (chassis_id) {
+            chassis = get_chassis(ctx.ovnsb_idl, chassis_id);
+            binding_get(&ctx, br_int, chassis, &ldatapaths, &lports,
+                        &local_datapaths, &local_lports);
+        }
+
+        if (br_int && chassis) {
+            pinctrl_run(&ctx, &lports, br_int, chassis, &local_datapaths);
+            update_sb_monitors(ctx.ovnsb_idl, chassis,
+                               &local_lports, &local_datapaths);
+        }
+
+        lport_index_destroy(&lports);
+        ldatapath_index_destroy(&ldatapaths);
+
+        sset_destroy(&local_lports);
+
+        struct local_datapath *cur_node, *next_node;
+        HMAP_FOR_EACH_SAFE (cur_node, next_node, hmap_node, &local_datapaths) {
+            hmap_remove(&local_datapaths, &cur_node->hmap_node);
+            free(cur_node);
+        }
+        hmap_destroy(&local_datapaths);
+
+        if (br_int) {
+            pinctrl_wait(&ctx);
+        }
+        ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
+        ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop);
+
+        latch_wait(&thread->exit_latch);
+        poll_block();
+    }
+
+    pinctrl_destroy();
+
+    ovsdb_idl_loop_destroy(&ovs_idl_loop);
+    ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
+
+    free(ovnsb_remote);
+
+    VLOG_INFO("pinctrl thread done");
+    return NULL;
+}
+
 void
 pinctrl_init(void)
 {
diff --git a/ovn/controller/pinctrl.h b/ovn/controller/pinctrl.h
index 230580b..573b536 100644
--- a/ovn/controller/pinctrl.h
+++ b/ovn/controller/pinctrl.h
@@ -31,6 +31,7 @@ void pinctrl_init(void);
 void pinctrl_run(struct controller_ctx *, const struct lport_index *,
                  const struct ovsrec_bridge *, const struct sbrec_chassis *,
                  struct hmap *local_datapaths);
+void *pinctrl_thread_main(void *arg);
 void pinctrl_wait(struct controller_ctx *);
 void pinctrl_destroy(void);
 
-- 
2.1.0

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to