Sync interconnection logical ports and bindings between NB, SB and ISB. With this patch, the OVN interconnection works end to end.
Signed-off-by: Han Zhou <hz...@ovn.org> --- controller/binding.c | 6 +- ic/ovn-ic.c | 369 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/ovn-util.c | 7 + lib/ovn-util.h | 2 + northd/ovn-northd.c | 9 +- ovn-architecture.7.xml | 2 +- ovn-nb.xml | 11 +- tests/ovn-ic.at | 66 +++++++++ 8 files changed, 461 insertions(+), 11 deletions(-) diff --git a/controller/binding.c b/controller/binding.c index aad9d39..347fede 100644 --- a/controller/binding.c +++ b/controller/binding.c @@ -793,11 +793,13 @@ binding_evaluate_port_binding_changes( * - If a regular VIF is unbound from this chassis, the local ovsdb * interface table will be updated, which will trigger recompute. * - * - If the port is not a regular VIF, always trigger recompute. */ + * - If the port is not a regular VIF, and not a "remote" port, + * always trigger recompute. */ if (binding_rec->chassis == chassis_rec || is_our_chassis(chassis_rec, binding_rec, active_tunnels, &lport_to_iface, local_lports) - || strcmp(binding_rec->type, "")) { + || (strcmp(binding_rec->type, "") && strcmp(binding_rec->type, + "remote"))) { changed = true; break; } diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c index 33c430a..d769efb 100644 --- a/ic/ovn-ic.c +++ b/ic/ovn-ic.c @@ -57,6 +57,10 @@ struct ic_context { struct ovsdb_idl_txn *ovnsb_txn; struct ovsdb_idl_txn *ovninb_txn; struct ovsdb_idl_txn *ovnisb_txn; + struct ovsdb_idl_index *nbrec_ls_by_name; + struct ovsdb_idl_index *sbrec_chassis_by_name; + struct ovsdb_idl_index *sbrec_port_binding_by_name; + struct ovsdb_idl_index *isbrec_port_binding_by_ts; }; static const char *ovnnb_db; @@ -392,6 +396,352 @@ gateway_run(struct ic_context *ctx, const struct isbrec_availability_zone *az) shash_destroy(&remote_gws); } +static const struct nbrec_logical_switch * +find_ts_in_nb(struct ic_context *ctx, char *ts_name) +{ + const struct nbrec_logical_switch *key = + nbrec_logical_switch_index_init_row(ctx->nbrec_ls_by_name); + nbrec_logical_switch_index_set_name(key, ts_name); + + const struct nbrec_logical_switch *ls; + bool found = false; + NBREC_LOGICAL_SWITCH_FOR_EACH_EQUAL (ls, key, ctx->nbrec_ls_by_name) { + const char *ls_ts_name = smap_get(&ls->other_config, "interconn-ts"); + if (ls_ts_name && !strcmp(ts_name, ls_ts_name)) { + found = true; + break; + } + } + nbrec_logical_switch_index_destroy_row(key); + + if (found) { + return ls; + } + return NULL; +} + +static const struct sbrec_port_binding * +find_sb_pb_by_name(struct ovsdb_idl_index *sbrec_port_binding_by_name, + const char *name) +{ + const struct sbrec_port_binding *key = + sbrec_port_binding_index_init_row(sbrec_port_binding_by_name); + sbrec_port_binding_index_set_logical_port(key, name); + + const struct sbrec_port_binding *pb = + sbrec_port_binding_index_find(sbrec_port_binding_by_name, key); + sbrec_port_binding_index_destroy_row(key); + + return pb; +} + +static const struct sbrec_port_binding * +find_peer_port(struct ic_context *ctx, + const struct sbrec_port_binding *sb_pb) +{ + const char *peer_name = smap_get(&sb_pb->options, "peer"); + if (!peer_name) { + return NULL; + } + + return find_sb_pb_by_name(ctx->sbrec_port_binding_by_name, peer_name); +} + +static const struct sbrec_port_binding * +find_crp_from_lrp(struct ic_context *ctx, + const struct sbrec_port_binding *lrp_pb) +{ + char *crp_name = ovn_chassis_redirect_name(lrp_pb->logical_port); + + const struct sbrec_port_binding *pb = + find_sb_pb_by_name(ctx->sbrec_port_binding_by_name, crp_name); + + free(crp_name); + return pb; +} + +static const struct sbrec_port_binding * +find_crp_for_sb_pb(struct ic_context *ctx, + const struct sbrec_port_binding *sb_pb) +{ + const struct sbrec_port_binding *peer = find_peer_port(ctx, sb_pb); + if (!peer) { + return NULL; + } + + return find_crp_from_lrp(ctx, peer); +} + +static const char * +get_lrp_address_for_sb_pb(struct ic_context *ctx, + const struct sbrec_port_binding *sb_pb) +{ + const struct sbrec_port_binding *peer = find_peer_port(ctx, sb_pb); + if (!peer) { + return NULL; + } + + return peer->n_mac ? *peer->mac : NULL; +} + +static const struct sbrec_chassis * +find_sb_chassis(struct ic_context *ctx, const char *name) +{ + const struct sbrec_chassis *key = + sbrec_chassis_index_init_row(ctx->sbrec_chassis_by_name); + sbrec_chassis_index_set_name(key, name); + + const struct sbrec_chassis *chassis = + sbrec_chassis_index_find(ctx->sbrec_chassis_by_name, key); + sbrec_chassis_index_destroy_row(key); + + return chassis; +} + +/* For each local port: + * - Sync from NB to ISB. + * - Sync gateway from SB to ISB. + * - Sync tunnel key from ISB to SB. + */ +static void +sync_local_port(struct ic_context *ctx, + const struct isbrec_port_binding *isb_pb, + const struct sbrec_port_binding *sb_pb) +{ + /* Sync address from NB to ISB */ + const char *address = get_lrp_address_for_sb_pb(ctx, sb_pb); + if (!address) { + VLOG_DBG("Can't get logical router port address for logical" + " switch port %s", sb_pb->logical_port); + if (isb_pb->address[0]) { + isbrec_port_binding_set_address(isb_pb, ""); + } + } else { + if (strcmp(address, isb_pb->address)) { + isbrec_port_binding_set_address(isb_pb, address); + } + } + + /* Sync gateway from SB to ISB */ + const struct sbrec_port_binding *crp = find_crp_for_sb_pb(ctx, sb_pb); + if (crp && crp->chassis) { + if (strcmp(crp->chassis->name, isb_pb->gateway)) { + isbrec_port_binding_set_gateway(isb_pb, crp->chassis->name); + } + } else { + if (isb_pb->gateway[0]) { + isbrec_port_binding_set_gateway(isb_pb, ""); + } + } + + /* Sync back tunnel key from ISB to SB */ + if (sb_pb->tunnel_key != isb_pb->tunnel_key) { + sbrec_port_binding_set_tunnel_key(sb_pb, isb_pb->tunnel_key); + } +} + +/* For each remote port: + * - Sync from ISB to NB + * - Sync gateway from ISB to SB + * - Sync tunnel key from ISB to SB + */ +static void +sync_remote_port(struct ic_context *ctx, + const struct isbrec_port_binding *isb_pb, + const struct nbrec_logical_switch_port *lsp, + const struct sbrec_port_binding *sb_pb) +{ + /* Sync address from ISB to NB */ + if (isb_pb->address[0]) { + if (lsp->n_addresses != 1 || + strcmp(isb_pb->address, lsp->addresses[0])) { + nbrec_logical_switch_port_set_addresses( + lsp, (const char **)&isb_pb->address, 1); + } + } else { + if (lsp->n_addresses != 0) { + nbrec_logical_switch_port_set_addresses(lsp, NULL, 0); + } + } + + /* Sync gateway from ISB to SB */ + if (isb_pb->gateway[0]) { + if (!sb_pb->chassis || strcmp(sb_pb->chassis->name, isb_pb->gateway)) { + const struct sbrec_chassis *chassis = + find_sb_chassis(ctx, isb_pb->gateway); + if (!chassis) { + VLOG_DBG("Chassis %s is not found in SB, syncing from ISB " + "to SB skipped for logical port %s.", + isb_pb->gateway, lsp->name); + return; + } + sbrec_port_binding_set_chassis(sb_pb, chassis); + } + } else { + if (sb_pb->chassis) { + sbrec_port_binding_set_chassis(sb_pb, NULL); + } + } + + /* Sync tunnel key from ISB to SB */ + if (sb_pb->tunnel_key != isb_pb->tunnel_key) { + sbrec_port_binding_set_tunnel_key(sb_pb, isb_pb->tunnel_key); + } +} + +static void +create_nb_lsp(struct ic_context *ctx, + const struct isbrec_port_binding *isb_pb, + const struct nbrec_logical_switch *ls) +{ + const struct nbrec_logical_switch_port *lsp = + nbrec_logical_switch_port_insert(ctx->ovnnb_txn); + nbrec_logical_switch_port_set_name(lsp, isb_pb->logical_port); + nbrec_logical_switch_port_set_type(lsp, "remote"); + + bool up = true; + nbrec_logical_switch_port_set_up(lsp, &up, 1); + + if (isb_pb->address[0]) { + nbrec_logical_switch_port_set_addresses( + lsp, (const char **)&isb_pb->address, 1); + } + + nbrec_logical_switch_update_ports_addvalue(ls, lsp); +} + +static void +create_isb_pb(struct ic_context *ctx, + const struct sbrec_port_binding *sb_pb, + const struct isbrec_availability_zone *az, + const char *ts_name, + uint32_t pb_tnl_key) +{ + const struct isbrec_port_binding *isb_pb = + isbrec_port_binding_insert(ctx->ovnisb_txn); + isbrec_port_binding_set_availability_zone(isb_pb, az); + isbrec_port_binding_set_transit_switch(isb_pb, ts_name); + isbrec_port_binding_set_logical_port(isb_pb, sb_pb->logical_port); + isbrec_port_binding_set_tunnel_key(isb_pb, pb_tnl_key); + + const char *address = get_lrp_address_for_sb_pb(ctx, sb_pb); + if (address) { + isbrec_port_binding_set_address(isb_pb, address); + } + + const struct sbrec_port_binding *crp = find_crp_for_sb_pb(ctx, sb_pb); + if (crp && crp->chassis) { + isbrec_port_binding_set_gateway(isb_pb, crp->chassis->name); + } + + /* XXX: Sync encap so that multiple encaps can be used for the same + * gateway. However, it is not needed for now, since we don't yet + * support specifying encap type/ip for gateway chassis or ha-chassis + * for logical router port in NB DB, and now encap should always be + * empty. The sync can be added if we add such support for gateway + * chassis/ha-chassis in NB DB. */ +} + +static const struct sbrec_port_binding * +find_lsp_in_sb(struct ic_context *ctx, + const struct nbrec_logical_switch_port *lsp) +{ + return find_sb_pb_by_name(ctx->sbrec_port_binding_by_name, lsp->name); +} + +static uint32_t +allocate_port_key(struct hmap *pb_tnlids) +{ + static uint32_t hint; + return ovn_allocate_tnlid(pb_tnlids, "transit port", + 1, (1u << 15) - 1, &hint); +} + +static void +port_binding_run(struct ic_context *ctx, + const struct isbrec_availability_zone *az) +{ + if (!ctx->ovnisb_txn || !ctx->ovnnb_txn || !ctx->ovnsb_txn) { + return; + } + + const struct inbrec_transit_switch *ts; + INBREC_TRANSIT_SWITCH_FOR_EACH (ts, ctx->ovninb_idl) { + const struct nbrec_logical_switch *ls = find_ts_in_nb(ctx, ts->name); + if (!ls) { + VLOG_DBG("Transit switch %s not found in NB.", ts->name); + continue; + } + struct shash local_pbs = SHASH_INITIALIZER(&local_pbs); + struct shash remote_pbs = SHASH_INITIALIZER(&remote_pbs); + struct hmap pb_tnlids = HMAP_INITIALIZER(&pb_tnlids); + const struct isbrec_port_binding *isb_pb; + const struct isbrec_port_binding *isb_pb_key = + isbrec_port_binding_index_init_row(ctx->isbrec_port_binding_by_ts); + isbrec_port_binding_index_set_transit_switch(isb_pb_key, ts->name); + + ISBREC_PORT_BINDING_FOR_EACH_EQUAL (isb_pb, isb_pb_key, + ctx->isbrec_port_binding_by_ts) { + if (isb_pb->availability_zone == az) { + shash_add(&local_pbs, isb_pb->logical_port, isb_pb); + } else { + shash_add(&remote_pbs, isb_pb->logical_port, isb_pb); + } + ovn_add_tnlid(&pb_tnlids, isb_pb->tunnel_key); + } + isbrec_port_binding_index_destroy_row(isb_pb_key); + + const struct nbrec_logical_switch_port *lsp; + for (int i = 0; i < ls->n_ports; i++) { + lsp = ls->ports[i]; + + const struct sbrec_port_binding *sb_pb = find_lsp_in_sb(ctx, lsp); + if (!strcmp(lsp->type, "router")) { + /* The port is local. */ + if (!sb_pb) { + continue; + } + isb_pb = shash_find_and_delete(&local_pbs, lsp->name); + if (!isb_pb) { + uint32_t pb_tnl_key = allocate_port_key(&pb_tnlids); + create_isb_pb(ctx, sb_pb, az, ts->name, pb_tnl_key); + } else { + sync_local_port(ctx, isb_pb, sb_pb); + } + } else if (!strcmp(lsp->type, "remote")) { + /* The port is remote. */ + isb_pb = shash_find_and_delete(&remote_pbs, lsp->name); + if (!isb_pb) { + nbrec_logical_switch_update_ports_delvalue(ls, lsp); + } else { + if (!sb_pb) { + continue; + } + sync_remote_port(ctx, isb_pb, lsp, sb_pb); + } + } else { + VLOG_DBG("Ignore lsp %s on ts %s with type %s.", + lsp->name, ts->name, lsp->type); + } + } + + /* Delete extra port-binding from ISB */ + struct shash_node *node; + SHASH_FOR_EACH (node, &local_pbs) { + isbrec_port_binding_delete(node->data); + } + + /* Create lsp in NB for remote ports */ + SHASH_FOR_EACH (node, &remote_pbs) { + create_nb_lsp(ctx, node->data, ls); + } + + shash_destroy(&local_pbs); + shash_destroy(&remote_pbs); + ovn_destroy_tnlids(&pb_tnlids); + } +} + static void ovn_db_run(struct ic_context *ctx) { @@ -404,6 +754,7 @@ ovn_db_run(struct ic_context *ctx) ts_run(ctx); gateway_run(ctx, az); + port_binding_run(ctx, az); } static void @@ -551,6 +902,20 @@ main(int argc, char *argv[]) struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, true, true)); + /* Create IDL indexes */ + struct ovsdb_idl_index *nbrec_ls_by_name + = ovsdb_idl_index_create1(ovnnb_idl_loop.idl, + &nbrec_logical_switch_col_name); + struct ovsdb_idl_index *sbrec_port_binding_by_name + = ovsdb_idl_index_create1(ovnsb_idl_loop.idl, + &sbrec_port_binding_col_logical_port); + struct ovsdb_idl_index *sbrec_chassis_by_name + = ovsdb_idl_index_create1(ovnsb_idl_loop.idl, + &sbrec_chassis_col_name); + struct ovsdb_idl_index *isbrec_port_binding_by_ts + = ovsdb_idl_index_create1(ovnisb_idl_loop.idl, + &isbrec_port_binding_col_transit_switch); + /* Ensure that only a single ovn-ic is active in the deployment by * acquiring a lock called "ovn_ic" on the southbound database * and then only performing DB transactions if the lock is held. */ @@ -571,6 +936,10 @@ main(int argc, char *argv[]) .ovninb_txn = ovsdb_idl_loop_run(&ovninb_idl_loop), .ovnisb_idl = ovnisb_idl_loop.idl, .ovnisb_txn = ovsdb_idl_loop_run(&ovnisb_idl_loop), + .nbrec_ls_by_name = nbrec_ls_by_name, + .sbrec_port_binding_by_name = sbrec_port_binding_by_name, + .sbrec_chassis_by_name = sbrec_chassis_by_name, + .isbrec_port_binding_by_ts = isbrec_port_binding_by_ts, }; if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { diff --git a/lib/ovn-util.c b/lib/ovn-util.c index 5333fbb..e2f6601 100644 --- a/lib/ovn-util.c +++ b/lib/ovn-util.c @@ -404,6 +404,7 @@ static const char *OVN_NB_LSP_TYPES[] = { "vtep", "external", "virtual", + "remote", }; bool @@ -508,3 +509,9 @@ ovn_allocate_tnlid(struct hmap *set, const char *name, uint32_t min, VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name); return 0; } + +char * +ovn_chassis_redirect_name(const char *port_name) +{ + return xasprintf("cr-%s", port_name); +} diff --git a/lib/ovn-util.h b/lib/ovn-util.h index b9bda8d..233362b 100644 --- a/lib/ovn-util.h +++ b/lib/ovn-util.h @@ -100,4 +100,6 @@ void ovn_destroy_tnlids(struct hmap *tnlids); void ovn_add_tnlid(struct hmap *set, uint32_t tnlid); uint32_t ovn_allocate_tnlid(struct hmap *set, const char *name, uint32_t min, uint32_t max, uint32_t *hint); + +char *ovn_chassis_redirect_name(const char *port_name); #endif diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index fd0b081..493a825 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -1143,12 +1143,6 @@ ovn_port_allocate_key(struct ovn_datapath *od) 1, (1u << 15) - 1, &od->port_key_hint); } -static char * -chassis_redirect_name(const char *port_name) -{ - return xasprintf("cr-%s", port_name); -} - static bool ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn) { @@ -2026,7 +2020,8 @@ join_logical_ports(struct northd_context *ctx, continue; } - char *redirect_name = chassis_redirect_name(nbrp->name); + char *redirect_name = + ovn_chassis_redirect_name(nbrp->name); struct ovn_port *crp = ovn_port_find(ports, redirect_name); if (crp) { crp->derived = true; diff --git a/ovn-architecture.7.xml b/ovn-architecture.7.xml index 56b2167..417acbf 100644 --- a/ovn-architecture.7.xml +++ b/ovn-architecture.7.xml @@ -1808,7 +1808,7 @@ </li> </ol> - <h2>OVN Deployments Interconnection (TODO)</h2> + <h2>OVN Deployments Interconnection</h2> <p> It is not uncommon for an operator to deploy multiple OVN clusters, for diff --git a/ovn-nb.xml b/ovn-nb.xml index a24a587..89678d4 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -514,7 +514,16 @@ parent owning the <code>virtual ip</code>. </li> </ul> - </dd> + </dd> + + <dt><code>remote</code></dt> + <dd> + A remote port is to model a port that resides remotely on another + OVN, which is on the other side of a transit logical switch for OVN + interconnection. This type of ports are created by + <code>ovn-ic</code> instead of by CMS. Any change to the port will + be automatically overwritten by <code>ovn-ic</code>. + </dd> </dl> </column> </group> diff --git a/tests/ovn-ic.at b/tests/ovn-ic.at index e5640e4..a620427 100644 --- a/tests/ovn-ic.at +++ b/tests/ovn-ic.at @@ -124,3 +124,69 @@ OVN_CLEANUP_SBOX(gw2) OVN_CLEANUP_IC([az1], [az2]) AT_CLEANUP + + +AT_SETUP([ovn-ic -- port sync]) +AT_SKIP_IF([test $HAVE_PYTHON = no]) + +ovn_init_ic_db +ovn-inbctl ts-add ts1 +net_add n1 +ovn_start az1 +ovn_start az2 +sim_add gw1 +as gw1 +ovs-vsctl add-br br-phys +ovn_az_attach az1 n1 br-phys 192.168.0.1 +ovs-vsctl set open . external-ids:is-interconn=true + +ovn_as az1 +OVS_WAIT_UNTIL([ovn-sbctl list datapath_binding | grep interconn-ts | grep ts1]) + +# Create LRP and connect to TS +ovn-nbctl lr-add lr1 +ovn-nbctl lrp-add lr1 lrp-lr1-ts1 aa:aa:aa:aa:aa:01 169.254.100.1/24 +ovn-nbctl lsp-add ts1 lsp-ts1-lr1 +ovn-nbctl lsp-set-addresses lsp-ts1-lr1 router +ovn-nbctl lsp-set-type lsp-ts1-lr1 router +ovn-nbctl lsp-set-options lsp-ts1-lr1 router-port=lrp-lr1-ts1 + +AT_CHECK([ovn_as az2 ovn-nbctl show | uuidfilt], [0], [dnl +switch <0> (ts1) + port lsp-ts1-lr1 + type: remote + addresses: [["aa:aa:aa:aa:aa:01 169.254.100.1/24"]] +]) + +AT_CHECK([ovn_as az2 ovn-sbctl -f csv -d bare --no-headings --columns logical_port,type list port_binding], [0], [dnl +lsp-ts1-lr1,remote +]) + +ovn-nbctl lrp-set-gateway-chassis lrp-lr1-ts1 gw1 +OVS_WAIT_UNTIL([ovn_as az2 ovn-sbctl show | grep lsp-ts1-lr1]) + +ovn-nbctl lrp-del-gateway-chassis lrp-lr1-ts1 gw1 +OVS_WAIT_WHILE([ovn_as az2 ovn-sbctl show | grep lsp-ts1-lr1]) + +ovn-nbctl set logical_router_port lrp-lr1-ts1 mac="\"aa:aa:aa:aa:aa:02\"" \ + networks="169.254.100.2/24 169.254.200.3/24" +OVS_WAIT_UNTIL([ovn_as az2 ovn-nbctl show | grep "aa:aa:aa:aa:aa:02 169.254.100.2/24 169.254.200.3/24"]) + +# Delete the router port from az1, the remote port in az2 should still remain +# but just lost address. +ovn-nbctl lrp-del lrp-lr1-ts1 +OVS_WAIT_WHILE([ovn_as az2 ovn-nbctl show | grep "aa:aa:aa:aa:aa:02 169.254.100.2/24 169.254.200.3/24"]) +AT_CHECK([ovn_as az2 ovn-nbctl show | uuidfilt], [0], [dnl +switch <0> (ts1) + port lsp-ts1-lr1 + type: remote +]) + +# Delete the lsp from az1, the remote port in az2 should be gone +ovn-nbctl lsp-del lsp-ts1-lr1 +OVS_WAIT_WHILE([ovn_as az2 ovn-nbctl show | grep lsp-ts1-lr1]) + +OVN_CLEANUP_SBOX(gw1) +OVN_CLEANUP_IC([az1], [az2]) + +AT_CLEANUP -- 2.1.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev