If ovn-controller is restarted, it may choose different conntrack zones than had been previously used, which could cause the wrong conntrack entries to be associated with a logical port. This commit stores in the integration bridge's OVS "Bridge" table the mapping to the conntrack zone.
Signed-off-by: Justin Pettit <jpet...@ovn.org> --- ovn/controller/ovn-controller.8.xml | 14 ++++ ovn/controller/ovn-controller.c | 136 ++++++++++++++++++++++++++++++++++-- 2 files changed, 146 insertions(+), 4 deletions(-) diff --git a/ovn/controller/ovn-controller.8.xml b/ovn/controller/ovn-controller.8.xml index 559031f..0484263 100644 --- a/ovn/controller/ovn-controller.8.xml +++ b/ovn/controller/ovn-controller.8.xml @@ -200,6 +200,20 @@ </dd> <dt> + <code>external_ids:ct-zone-*</code> in the <code>Bridge</code> table + </dt> + <dd> + Logical ports and gateway routers are assigned a connection + tracking zone by <code>ovn-controller</code> for stateful + services. To keep state across restarts of + <code>ovn-controller</code>, these keys are stored in the + integration bridge's Bridge table. The name contains a prefix + of <code>ct-zone-</code> followed by the name of the logical + port. The value for this key identifies the zone used for this + port. + </dd> + + <dt> <code>external_ids:ovn-localnet-port</code> in the <code>Port</code> table </dt> diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c index 49821f7..b051a75 100644 --- a/ovn/controller/ovn-controller.c +++ b/ovn/controller/ovn-controller.c @@ -229,9 +229,21 @@ get_ovnsb_remote(struct ovsdb_idl *ovs_idl) } } +enum ct_zone_pending_state { + CT_ZONE_DB_QUEUED, /* Waiting for DB transaction to open. */ + CT_ZONE_DB_SENT, /* Sent and waiting for confirmation from DB. */ +}; + +struct ct_zone_pending_entry { + enum ct_zone_pending_state state; + int zone; + bool add; /* Is the entry being added? */ +}; + static void update_ct_zones(struct sset *lports, struct hmap *patched_datapaths, - struct simap *ct_zones, unsigned long *ct_zone_bitmap) + struct simap *ct_zones, unsigned long *ct_zone_bitmap, + struct shash *pending_ct_zones) { struct simap_node *ct_zone, *ct_zone_next; int scan_start = 1; @@ -260,6 +272,15 @@ update_ct_zones(struct sset *lports, struct hmap *patched_datapaths, /* Delete zones that do not exist in above sset. */ SIMAP_FOR_EACH_SAFE(ct_zone, ct_zone_next, ct_zones) { if (!sset_contains(&all_users, ct_zone->name)) { + VLOG_DBG("removing ct zone %"PRId32" for '%s'", + ct_zone->data, ct_zone->name); + + struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending); + pending->state = CT_ZONE_DB_QUEUED; + pending->zone = ct_zone->data; + pending->add = false; + shash_add(pending_ct_zones, ct_zone->name, pending); + bitmap_set0(ct_zone_bitmap, ct_zone->data); simap_delete(ct_zones, ct_zone); } @@ -271,7 +292,7 @@ update_ct_zones(struct sset *lports, struct hmap *patched_datapaths, /* Assign a unique zone id for each logical port and two zones * to a gateway router. */ SSET_FOR_EACH(user, &all_users) { - size_t zone; + int zone; if (simap_contains(ct_zones, user)) { continue; @@ -286,6 +307,14 @@ update_ct_zones(struct sset *lports, struct hmap *patched_datapaths, } scan_start = zone + 1; + VLOG_DBG("assigning ct zone %"PRId32" to '%s'", zone, user); + + struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending); + pending->state = CT_ZONE_DB_QUEUED; + pending->zone = zone; + pending->add = true; + shash_add(pending_ct_zones, user, pending); + bitmap_set1(ct_zone_bitmap, zone); simap_put(ct_zones, user, zone); @@ -297,6 +326,90 @@ update_ct_zones(struct sset *lports, struct hmap *patched_datapaths, sset_destroy(&all_users); } +static void +commit_ct_zones(struct controller_ctx *ctx, + const struct ovsrec_bridge *br_int, + struct shash *pending_ct_zones) +{ + if (!ctx->ovs_idl_txn) { + return; + } + + struct smap new_ids; + smap_clone(&new_ids, &br_int->external_ids); + + bool updated = false; + struct shash_node *iter; + SHASH_FOR_EACH(iter, pending_ct_zones) { + struct ct_zone_pending_entry *ctzpe = iter->data; + + /* The transaction is open, so any pending entries in the + * CT_ZONE_DB_QUEUED must be sent and any in CT_ZONE_DB_QUEUED + * need to be retried. */ + if (ctzpe->state != CT_ZONE_DB_QUEUED + && ctzpe->state != CT_ZONE_DB_SENT) { + continue; + } + + char *user_str = xasprintf("ct-zone-%s", iter->name); + if (ctzpe->add) { + char *zone_str = xasprintf("%"PRId32, ctzpe->zone); + smap_replace(&new_ids, user_str, zone_str); + free(zone_str); + } else { + smap_remove(&new_ids, user_str); + } + free(user_str); + + ctzpe->state = CT_ZONE_DB_SENT; + updated = true; + } + + if (updated) { + ovsrec_bridge_verify_external_ids(br_int); + ovsrec_bridge_set_external_ids(br_int, &new_ids); + } + smap_destroy(&new_ids); +} + +static void +restore_ct_zones(struct ovsdb_idl *ovs_idl, + struct simap *ct_zones, unsigned long *ct_zone_bitmap) +{ + const struct ovsrec_open_vswitch *cfg; + cfg = ovsrec_open_vswitch_first(ovs_idl); + if (!cfg) { + return; + } + + const char *br_int_name = smap_get_def(&cfg->external_ids, "ovn-bridge", + DEFAULT_BRIDGE_NAME); + + const struct ovsrec_bridge *br_int; + br_int = get_bridge(ovs_idl, br_int_name); + if (!br_int) { + /* If the integration bridge hasn't been defined, assume that + * any existing ct-zone definitions aren't valid. */ + return; + } + + struct smap_node *node; + SMAP_FOR_EACH(node, &br_int->external_ids) { + if (strncmp(node->key, "ct-zone-", 8)) { + continue; + } + + const char *user = node->key + 8; + int zone = atoi(node->value); + + if (user[0] && zone) { + VLOG_DBG("restoring ct zone %"PRId32" for '%s'", zone, user); + bitmap_set1(ct_zone_bitmap, zone); + simap_put(ct_zones, user, zone); + } + } +} + static int64_t get_nb_cfg(struct ovsdb_idl *idl) { @@ -362,6 +475,7 @@ main(int argc, char *argv[]) ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_name); ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_fail_mode); ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_other_config); + ovsdb_idl_add_column(ovs_idl_loop.idl, &ovsrec_bridge_col_external_ids); chassis_register_ovs_idl(ovs_idl_loop.idl); encaps_register_ovs_idl(ovs_idl_loop.idl); binding_register_ovs_idl(ovs_idl_loop.idl); @@ -381,9 +495,11 @@ main(int argc, char *argv[]) /* Initialize connection tracking zones. */ struct simap ct_zones = SIMAP_INITIALIZER(&ct_zones); + struct shash pending_ct_zones = SHASH_INITIALIZER(&pending_ct_zones); unsigned long ct_zone_bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)]; memset(ct_zone_bitmap, 0, sizeof ct_zone_bitmap); bitmap_set1(ct_zone_bitmap, 0); /* Zone 0 is reserved. */ + restore_ct_zones(ovs_idl_loop.idl, &ct_zones, ct_zone_bitmap); unixctl_command_register("ct-zone-list", "", 0, 0, ct_zone_list, &ct_zones); @@ -440,7 +556,8 @@ main(int argc, char *argv[]) pinctrl_run(&ctx, &lports, br_int, chassis_id, &local_datapaths); update_ct_zones(&all_lports, &patched_datapaths, &ct_zones, - ct_zone_bitmap); + ct_zone_bitmap, &pending_ct_zones); + commit_ct_zones(&ctx, br_int, &pending_ct_zones); struct hmap flow_table = HMAP_INITIALIZER(&flow_table); lflow_run(&ctx, &lports, &mcgroups, &local_datapaths, @@ -493,9 +610,20 @@ main(int argc, char *argv[]) pinctrl_wait(&ctx); } ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop); - ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop); ovsdb_idl_track_clear(ovnsb_idl_loop.idl); + + if (ovsdb_idl_loop_commit_and_wait(&ovs_idl_loop) == 1) { + struct shash_node *iter, *iter_next; + SHASH_FOR_EACH_SAFE(iter, iter_next, &pending_ct_zones) { + struct ct_zone_pending_entry *ctzpe = iter->data; + if (ctzpe->state == CT_ZONE_DB_SENT) { + shash_delete(&pending_ct_zones, iter); + free(ctzpe); + } + } + } ovsdb_idl_track_clear(ovs_idl_loop.idl); + poll_block(); if (should_service_stop()) { exiting = true; -- 1.9.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev