From: Anton Ivanov <anton.iva...@cambridgegreys.com>

Restore parallel build with dp groups using rwlock instead
of per row locking as an underlying mechanism.

This provides improvement ~ 10% end-to-end on ovn-heater
under virutalization despite awakening some qemu gremlin
which makes qemu climb to silly CPU usage. The gain on
bare metal is likely to be higher.

Signed-off-by: Anton Ivanov <anton.iva...@cambridgegreys.com>
---
 northd/ovn-northd.c | 150 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 127 insertions(+), 23 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index ed231510e..34e6ad1a9 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -59,6 +59,7 @@
 #include "unixctl.h"
 #include "util.h"
 #include "uuid.h"
+#include "ovs-thread.h"
 #include "openvswitch/vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(ovn_northd);
@@ -4294,6 +4295,7 @@ struct ovn_lflow {
     struct hmap_node hmap_node;
 
     struct ovn_datapath *od;     /* 'logical_datapath' in SB schema.  */
+    struct ovs_mutex odg_lock;   /* Lock guarding access to od_group */
     struct hmapx od_group;       /* Hash map of 'struct ovn_datapath *'. */
     enum ovn_stage stage;
     uint16_t priority;
@@ -4335,6 +4337,11 @@ ovn_lflow_equal(const struct ovn_lflow *a, const struct 
ovn_datapath *od,
             && !strcmp(a->actions, actions)
             && nullable_string_is_equal(a->ctrl_meter, ctrl_meter));
 }
+/* If this option is 'true' northd will combine logical flows that differ by
+ * logical datapath only by creating a datapath group. */
+static bool use_logical_dp_groups = false;
+static bool use_parallel_build = true;
+
 
 static void
 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
@@ -4353,24 +4360,56 @@ ovn_lflow_init(struct ovn_lflow *lflow, struct 
ovn_datapath *od,
     lflow->ctrl_meter = ctrl_meter;
     lflow->dpg = NULL;
     lflow->where = where;
+    if (use_parallel_build && use_logical_dp_groups) {
+        ovs_mutex_init(&lflow->odg_lock);
+    }
 }
 
-/* If this option is 'true' northd will combine logical flows that differ by
- * logical datapath only by creating a datapath group. */
-static bool use_logical_dp_groups = false;
-static bool use_parallel_build = true;
+ /* Adds a row with the specified contents to the Logical_Flow table.
+  * Version to use with dp_groups + parallel - when locking is required.
+ *
+ * Assumptions:
+ *
+ * 1. A large proportion of the operations are lookups (reads).
+ * 2. RW operations are a small proportion of overall adds.
+ * 3. Most RW ops are not flow adds, but changes to the
+ * od groups.
+ *
+ * Principles of operation:
+ * 1. All accesses to the flow table are protected by a rwlock.
+ * 2. By default, everyone grabs a rd lock so that multiple threads
+ * can do lookups simultaneously.
+ * 3. If a change to the lflow is needed, the rd lock is released and
+ * a wr lock is acquired instead (the fact that POSIX does not have an
+ * "upgrade" on locks is a major pain, but there is nothing we can do
+ * - it's not available).
+ * 4. WR lock operations in rd/wr locking have LOWER priority than RD.
+ * That is by design and spec. So the code after a request for WR lock
+ * may wait for a considerable amount of time until it is given a
+ * change to run. That means that another thread may get there in the
+ * meantime and change the data. Hence all wr operations MUST be coded
+ * to ensure that they are not vulnerable to "someone pulled this from
+ * under my feet". Re- reads, checks for presense, etc.
+ * 5. Operations on the actual od_group hash map are protected by
+ * per-flow locks. There is no need for these to be rd, mutex is more
+ * appropriate. They are low contention as each protects only its flow
+ * and only during modification which happen while holding a rd lock on
+ * the flow table.
+ */
 
-static struct hashrow_locks lflow_locks;
+static struct ovs_rwlock flowtable_lock;
 
 /* Adds a row with the specified contents to the Logical_Flow table.
- * Version to use when locking is required.
+ * Version to use when locking is NOT required.
  */
+
 static struct ovn_lflow *
 do_ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
                  uint32_t hash, enum ovn_stage stage, uint16_t priority,
                  const char *match, const char *actions, const char *io_port,
                  const struct ovsdb_idl_row *stage_hint,
                  const char *where, const char *ctrl_meter)
+                 OVS_NO_THREAD_SAFETY_ANALYSIS
 {
 
     struct ovn_lflow *old_lflow;
@@ -4403,6 +4442,59 @@ do_ovn_lflow_add(struct hmap *lflow_map, struct 
ovn_datapath *od,
     return lflow;
 }
 
+/* Adds a row with the specified contents to the Logical_Flow table.
+ * Version to use when locking is IS required.
+ */
+
+static struct ovn_lflow *
+do_ovn_lflow_add_pd(struct hmap *lflow_map, struct ovn_datapath *od,
+                    uint32_t hash, enum ovn_stage stage, uint16_t priority,
+                    const char *match, const char *actions,
+                    const char *io_port,
+                    const struct ovsdb_idl_row *stage_hint,
+                    const char *where, const char *ctrl_meter)
+                    OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+
+    struct ovn_lflow *old_lflow;
+    struct ovn_lflow *lflow;
+
+    /* Fast Path - try to amend an existing flow without asking
+     * for WR access to the whole flow table. Locking the actual
+     * hmapx for the particular flow's odg is low overhead as its
+     * contention is much lower.
+     */
+
+    ovs_rwlock_rdlock(&flowtable_lock);
+    old_lflow = ovn_lflow_find(lflow_map, NULL, stage, priority, match,
+                               actions, ctrl_meter, hash);
+    if (old_lflow) {
+        ovs_mutex_lock(&old_lflow->odg_lock);
+        hmapx_add(&old_lflow->od_group, od);
+        ovs_mutex_unlock(&old_lflow->odg_lock);
+    }
+    ovs_rwlock_unlock(&flowtable_lock);
+
+    if (old_lflow) {
+        return old_lflow;
+    }
+
+    ovs_rwlock_wrlock(&flowtable_lock);
+
+    /* We need to rerun the "if in flowtable" steps, because someone
+     * could have inserted it while we were waiting to acquire an
+     * wr lock. As we are now holding a wr lock on it nobody else is
+     * in the * "fast" portion of the code which is protected by the
+     * rwlock.
+     */
+    lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
+                                 actions, io_port, stage_hint, where,
+                                 ctrl_meter);
+    ovs_rwlock_unlock(&flowtable_lock);
+    return lflow;
+}
+
+
 static struct ovn_lflow *
 ovn_lflow_add_at_with_hash(struct hmap *lflow_map, struct ovn_datapath *od,
                            enum ovn_stage stage, uint16_t priority,
@@ -4415,11 +4507,9 @@ ovn_lflow_add_at_with_hash(struct hmap *lflow_map, 
struct ovn_datapath *od,
 
     ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
     if (use_logical_dp_groups && use_parallel_build) {
-        lock_hash_row(&lflow_locks, hash);
-        lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
-                                 actions, io_port, stage_hint, where,
-                                 ctrl_meter);
-        unlock_hash_row(&lflow_locks, hash);
+        lflow = do_ovn_lflow_add_pd(lflow_map, od, hash, stage, priority,
+                                    match, actions, io_port, stage_hint, where,
+                                    ctrl_meter);
     } else {
         lflow = do_ovn_lflow_add(lflow_map, od, hash, stage, priority, match,
                          actions, io_port, stage_hint, where, ctrl_meter);
@@ -4447,17 +4537,17 @@ ovn_lflow_add_at(struct hmap *lflow_map, struct 
ovn_datapath *od,
 
 static bool
 ovn_dp_group_add_with_reference(struct ovn_lflow *lflow_ref,
-                                struct ovn_datapath *od,
-                                uint32_t hash)
+                                struct ovn_datapath *od)
+                                OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     if (!use_logical_dp_groups || !lflow_ref) {
         return false;
     }
 
-    if (use_parallel_build) {
-        lock_hash_row(&lflow_locks, hash);
+    if (use_parallel_build & use_logical_dp_groups) {
+        ovs_mutex_lock(&lflow_ref->odg_lock);
         hmapx_add(&lflow_ref->od_group, od);
-        unlock_hash_row(&lflow_locks, hash);
+        ovs_mutex_unlock(&lflow_ref->odg_lock);
     } else {
         hmapx_add(&lflow_ref->od_group, od);
     }
@@ -6423,7 +6513,7 @@ build_lb_rules(struct hmap *lflows, struct ovn_northd_lb 
*lb,
             if (reject) {
                 meter = copp_meter_get(COPP_REJECT, od->nbs->copp,
                                        meter_groups);
-            } else if (ovn_dp_group_add_with_reference(lflow_ref, od, hash)) {
+            } else if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
                 continue;
             }
             lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
@@ -9476,7 +9566,7 @@ build_lrouter_defrag_flows_for_lb(struct ovn_northd_lb 
*lb,
                 ds_cstr(match), ds_cstr(&defrag_actions));
         for (size_t j = 0; j < lb->n_nb_lr; j++) {
             struct ovn_datapath *od = lb->nb_lr[j];
-            if (ovn_dp_group_add_with_reference(lflow_ref, od, hash)) {
+            if (ovn_dp_group_add_with_reference(lflow_ref, od)) {
                 continue;
             }
             lflow_ref = ovn_lflow_add_at_with_hash(lflows, od,
@@ -9540,7 +9630,7 @@ build_lflows_for_unreachable_vips(struct ovn_northd_lb 
*lb,
                 continue;
             }
 
-            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od, hash)) {
+            if (ovn_dp_group_add_with_reference(lflow_ref, peer->od)) {
                 continue;
             }
             lflow_ref = ovn_lflow_add_at_with_hash(lflows, peer->od,
@@ -12974,7 +13064,7 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, 
struct hmap *ports,
         }
     }
 
-    if (use_parallel_build && (!use_logical_dp_groups)) {
+    if (use_parallel_build) {
         struct hmap *lflow_segs;
         struct lswitch_flow_build_info *lsiv;
         int index;
@@ -13156,6 +13246,8 @@ ovn_sb_set_lflow_logical_dp_group(
 }
 
 static ssize_t max_seen_lflow_size = 128;
+static bool needs_parallel_init = true;
+static bool reset_parallel = false;
 
 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
  * constructing their contents based on the OVN_NB database. */
@@ -13169,9 +13261,22 @@ build_lflows(struct northd_context *ctx, struct hmap 
*datapaths,
 {
     struct hmap lflows;
 
+    if (reset_parallel) {
+        /* Parallel build was disabled before, we need to
+         * re-enable it. */
+        use_parallel_build = true;
+        reset_parallel = false;
+    }
+
     fast_hmap_size_for(&lflows, max_seen_lflow_size);
-    if (use_parallel_build) {
-        update_hashrow_locks(&lflows, &lflow_locks);
+    if (use_parallel_build && use_logical_dp_groups &&
+        needs_parallel_init) {
+        ovs_rwlock_init(&flowtable_lock);
+        needs_parallel_init = false;
+        /* Disable parallel build on first run with dp_groups
+         * to determine the correct sizing of hashes. */
+        use_parallel_build = false;
+        reset_parallel = true;
     }
     build_lswitch_and_lrouter_flows(datapaths, ports,
                                     port_groups, &lflows, mcgroups,
@@ -15167,7 +15272,6 @@ main(int argc, char *argv[])
 
     daemonize_complete();
 
-    init_hash_row_locks(&lflow_locks);
     use_parallel_build = can_parallelize_hashes(false);
 
     /* We want to detect (almost) all changes to the ovn-nb db. */
-- 
2.20.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to