On Tue, Nov 17, 2020 at 7:17 AM Mark Michelson <[email protected]> wrote:
>
> In certain situations, OVN may coexist with other applications on a
> host. Traffic from OVN and the other applications may then go out a
> shared gateway. If OVN traffic and the other application traffic use
> different conntrack zones for SNAT, then it is possible for the shared
> gateway to assign conflicting source IP:port combinations. By sharing
> the same conntrack zone, there will be no conflicting assignments.
>
> In this commit, we introduce options:snat-ct-zone for northbound logical
> routers. By setting this option, users can explicitly set the conntrack
> zone for the logical router so that it will match the zone used by
> non-OVN traffic on the host.
>
> The biggest side effects of this patch are:
> 1) southbound datapath changes now result in recalculating CT zones in
>    ovn-controller. This can result in recomputing physical flows in more
>    situations than previously.
> 2) The table 65 flow to transition between datapaths is no longer
>    associated with a port binding. This is because the flow refers to
>    the peer datapath's CT zones, which can now be updated due to changes
>    on that datapath. The flow therefore may need to be updated either
>    due to the port binding being changed or the peer datapath being
>    changed.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1892311
> Signed-off-by: Mark Michelson <[email protected]>
> ---
>  controller/ovn-controller.c |  86 +++++++++++++++++++----
>  controller/physical.c       |   2 +-
>  lib/ovn-util.c              |   7 ++
>  lib/ovn-util.h              |   1 +
>  northd/ovn-northd.c         |  10 +++
>  ovn-nb.xml                  |   7 ++
>  tests/ovn.at                | 135 ++++++++++++++++++++++++++++++++++++
>  7 files changed, 234 insertions(+), 14 deletions(-)
>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index a06cae3cc..25de0c72f 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -531,6 +531,21 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl 
> *ovnsb_idl,
>      }
>  }
>
> +static void
> +add_pending_ct_zone_entry(struct shash *pending_ct_zones,
> +                          enum ct_zone_pending_state state,
> +                          int zone, bool add, const char *name)
> +{
> +    VLOG_DBG("%s ct zone %"PRId32" for '%s'",
> +             add ? "assigning" : "removing", zone, name);
> +
> +    struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
> +    pending->state = state; /* Skip flushing zone. */
> +    pending->zone = zone;
> +    pending->add = add;
> +    shash_add(pending_ct_zones, name, pending);
> +}
> +
>  static void
>  update_ct_zones(const struct sset *lports, const struct hmap 
> *local_datapaths,
>                  struct simap *ct_zones, unsigned long *ct_zone_bitmap,
> @@ -540,6 +555,8 @@ update_ct_zones(const struct sset *lports, const struct 
> hmap *local_datapaths,
>      int scan_start = 1;
>      const char *user;
>      struct sset all_users = SSET_INITIALIZER(&all_users);
> +    struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
> +    unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];
>
>      SSET_FOR_EACH(user, lports) {
>          sset_add(&all_users, user);
> @@ -554,6 +571,11 @@ update_ct_zones(const struct sset *lports, const struct 
> hmap *local_datapaths,
>          char *snat = alloc_nat_zone_key(&ld->datapath->header_.uuid, "snat");
>          sset_add(&all_users, dnat);
>          sset_add(&all_users, snat);
> +
> +        int req_snat_zone = datapath_snat_ct_zone(ld->datapath);
> +        if (req_snat_zone >= 0) {
> +            simap_put(&req_snat_zones, snat, req_snat_zone);
> +        }
>          free(dnat);
>          free(snat);
>      }
> @@ -564,15 +586,56 @@ update_ct_zones(const struct sset *lports, const struct 
> hmap *local_datapaths,
>              VLOG_DBG("removing ct zone %"PRId32" for '%s'",
>                       ct_zone->data, ct_zone->name);
>
> -            struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
> -            pending->state = CT_ZONE_DB_QUEUED; /* Skip flushing zone. */
> -            pending->zone = ct_zone->data;
> -            pending->add = false;
> -            shash_add(pending_ct_zones, ct_zone->name, pending);
> +            add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_DB_QUEUED,
> +                                      ct_zone->data, false, ct_zone->name);
>
>              bitmap_set0(ct_zone_bitmap, ct_zone->data);
>              simap_delete(ct_zones, ct_zone);
> +        } else if (!simap_find(&req_snat_zones, ct_zone->name)) {
> +            bitmap_set1(unreq_snat_zones, ct_zone->data);
> +        }
> +    }
> +
> +    /* Prioritize requested CT zones */
> +    struct simap_node *snat_req_node;
> +    SIMAP_FOR_EACH (snat_req_node, &req_snat_zones) {
> +        struct simap_node *node = simap_find(ct_zones, snat_req_node->name);
> +        if (node) {
> +            if (node->data == snat_req_node->data) {
> +                /* No change to this request, so no action needed */
> +                continue;
> +            } else {
> +                /* Zone request has changed for this node. delete old entry 
> */
> +                bitmap_set0(ct_zone_bitmap, node->data);
> +                simap_delete(ct_zones, node);
> +            }
>          }
> +
> +        /* Determine if someone already had this zone auto-assigned.
> +         * If so, then they need to give up their assignment since
> +         * that zone is being explicitly requested now.
> +         */
> +        if (bitmap_is_set(unreq_snat_zones, snat_req_node->data)) {
> +            struct simap_node *dup;
> +            struct simap_node *next;
> +            SIMAP_FOR_EACH_SAFE (dup, next, ct_zones) {
> +                if (dup != snat_req_node && dup->data == 
> snat_req_node->data) {
> +                    simap_delete(ct_zones, dup);
> +                    break;
> +                }
> +            }
> +            /* Set this bit to 0 so that if multiple datapaths have requested
> +             * this zone, we don't needlessly double-detect this condition.
> +             */
> +            bitmap_set0(unreq_snat_zones, snat_req_node->data);
> +        }
> +
> +        add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> +                                  snat_req_node->data, true,
> +                                  snat_req_node->name);
> +
> +        bitmap_set1(ct_zone_bitmap, snat_req_node->data);
> +        simap_put(ct_zones, snat_req_node->name, snat_req_node->data);
>      }
>
>      /* xxx This is wasteful to assign a zone to each port--even if no
> @@ -592,22 +655,18 @@ update_ct_zones(const struct sset *lports, const struct 
> hmap *local_datapaths,
>          if (zone == MAX_CT_ZONES + 1) {
>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>              VLOG_WARN_RL(&rl, "exhausted all ct zones");
> -            return;
> +            break;
>          }
>          scan_start = zone + 1;
>
> -        VLOG_DBG("assigning ct zone %"PRId32" to '%s'", zone, user);
> -
> -        struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
> -        pending->state = CT_ZONE_OF_QUEUED;
> -        pending->zone = zone;
> -        pending->add = true;
> -        shash_add(pending_ct_zones, user, pending);
> +        add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
> +                                  zone, true, user);
>
>          bitmap_set1(ct_zone_bitmap, zone);
>          simap_put(ct_zones, user, zone);
>      }
>
> +    simap_destroy(&req_snat_zones);
>      sset_destroy(&all_users);
>  }
>
> @@ -2330,6 +2389,7 @@ main(int argc, char *argv[])
>
>      engine_add_input(&en_ct_zones, &en_ovs_open_vswitch, NULL);
>      engine_add_input(&en_ct_zones, &en_ovs_bridge, NULL);
> +    engine_add_input(&en_ct_zones, &en_sb_datapath_binding, NULL);
>      engine_add_input(&en_ct_zones, &en_runtime_data, NULL);
>
>      engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL);
> diff --git a/controller/physical.c b/controller/physical.c
> index 1bc2c389b..00c4ca4fd 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -926,7 +926,7 @@ consider_port_binding(struct ovsdb_idl_index 
> *sbrec_port_binding_by_name,
>
>          ofctrl_add_flow(flow_table, OFTABLE_LOG_TO_PHY, 100,
>                          binding->header_.uuid.parts[0],
> -                        &match, ofpacts_p, &binding->header_.uuid);
> +                        &match, ofpacts_p, hc_uuid);
>          return;
>      }
>
> diff --git a/lib/ovn-util.c b/lib/ovn-util.c
> index 18aac8da3..f0fb796b4 100644
> --- a/lib/ovn-util.c
> +++ b/lib/ovn-util.c
> @@ -532,6 +532,13 @@ datapath_is_switch(const struct sbrec_datapath_binding 
> *ldp)
>  {
>      return smap_get(&ldp->external_ids, "logical-switch") != NULL;
>  }
> +
> +int
> +datapath_snat_ct_zone(const struct sbrec_datapath_binding *dp)
> +{
> +    return smap_get_int(&dp->external_ids, "snat-ct-zone", -1);
> +}
> +
>
>  struct tnlid_node {
>      struct hmap_node hmap_node;
> diff --git a/lib/ovn-util.h b/lib/ovn-util.h
> index 3496673b2..ea8226571 100644
> --- a/lib/ovn-util.h
> +++ b/lib/ovn-util.h
> @@ -107,6 +107,7 @@ uint32_t ovn_logical_flow_hash(const struct uuid 
> *logical_datapath,
>                                 uint16_t priority,
>                                 const char *match, const char *actions);
>  bool datapath_is_switch(const struct sbrec_datapath_binding *);
> +int datapath_snat_ct_zone(const struct sbrec_datapath_binding *ldp);
>  void ovn_conn_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
>                     const char *argv[] OVS_UNUSED, void *idl_);
>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 4d4190cb9..8ce756c8a 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -1179,6 +1179,16 @@ ovn_datapath_update_external_ids(struct ovn_datapath 
> *od)
>              smap_add(&ids, "interconn-ts", ts);
>          }
>      }
> +
> +    /* Set snat-ct-zone */
> +    if (od->nbr) {
> +        int nat_default_ct = smap_get_int(&od->nbr->options,
> +                                           "snat-ct-zone", -1);
> +        if (nat_default_ct >= 0) {
> +            smap_add_format(&ids, "snat-ct-zone", "%d", nat_default_ct);
> +        }
> +    }
> +
>      sbrec_datapath_binding_set_external_ids(od->sb, &ids);
>      smap_destroy(&ids);
>  }
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 5e6c6c7a3..619c0e299 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -1961,6 +1961,13 @@
>          unique key for each datapath by itself.  However, if it is 
> configured,
>          <code>ovn-northd</code> honors the configured value.
>        </column>
> +      <column name="options" key="snat-ct-zone"
> +          type='{"type": "integer", "minInteger": 0, "maxInteger": 65535}'>
> +        Use the requested conntrack zone for SNAT with this router. This can 
> be
> +        useful if egress traffic from the host running OVN comes from both 
> OVN
> +        and other sources. This way, OVN and the other sources can make use 
> of
> +        the same conntrack zone.
> +      </column>
>      </group>
>
>      <group title="Common Columns">
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 378d11921..ca65dfff7 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -22686,3 +22686,138 @@ AT_CHECK([test "$encap_rec_mvtep" == 
> "$encap_rec_mvtep1"], [0], [])
>
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
> +
> +AT_SETUP([ovn -- snat default ct zone])
> +ovn_start
> +
> +net_add n1
> +sim_add hv1
> +ovs-vsctl add-br br-phys
> +as hv1
> +ovn_attach n1 br-phys 192.168.0.10
> +
> +ovn-nbctl ls-add sw0
> +ovn-nbctl lsp-add sw0 sw0-p1
> +ovn-nbctl lsp-set-addresses sw0-p1 "00:00:00:00:00:02 10.0.0.2"
> +
> +ovn-nbctl lr-add gw_router
> +ovn-nbctl set Logical_Router gw_router options:chassis="hv1"
> +
> +ovn-nbctl lrp-add gw_router gw_router-sw0 00:00:00:00:00:01 10.0.0.1/24
> +ovn-nbctl lsp-add sw0 sw0-gw_router
> +ovn-nbctl lsp-set-addresses sw0-gw_router router
> +ovn-nbctl set Logical_Switch_Port sw0-gw_router type=router \
> +                             options:router-port=gw_router-sw0 \
> +

I think for all the above (and below) ovn-nbctl commands you can wrap
them with "check" command [1]


> +ovn-nbctl lr-nat-add gw_router snat 192.168.0.1 10.0.0.0/24
> +
> +as hv1 ovs-vsctl -- add-port br-int hv1-vif1 -- \
> +    set interface hv1-vif1 external-ids:iface-id=sw0-p1
> +
> +ovn-nbctl --wait=hv sync
> +

I think for all the below  commands you can make use of the new helper
functions added recently by Ben.

> +ro_nb_uuid=$(ovn-nbctl get Logical_Router gw_router _uuid)

i.e you can replace the above with

ro_nb_uuid=$(fetch_column Logical_Router _uuid gw_router)

> +sw_nb_uuid=$(ovn-nbctl get Logical_Switch sw0 _uuid)
> +ro_sb_uuid=$(ovn-sbctl --bare --columns=_uuid find Datapath_Binding 
> external-ids:logical-router=${ro_nb_uuid})
> +sw_sb_uuid=$(ovn-sbctl --bare --columns=_uuid find Datapath_Binding 
> external-ids:logical-switch=${sw_nb_uuid})
> +ro_meta=$(ovn-sbctl get Datapath_Binding ${ro_sb_uuid} tunnel_key)
> +ro_meta=$(printf %#x ${ro_meta})
> +sw_meta=$(ovn-sbctl get Datapath_Binding ${sw_sb_uuid} tunnel_key)
> +sw_meta=$(printf %#x ${sw_meta})


With these addressed - Acked-by: Numan Siddique <[email protected]>

[1] - 
https://github.com/ovn-org/ovn/commit/4afe409e95c72187a8f7a755fa19b17237d14818


Thanks
Numan

> +
> +echo "ro_nb_uuid: ${ro_nb_uuid}"
> +echo "sw_nb_uuid: ${sw_nb_uuid}"
> +echo "ro_sb_uuid: ${ro_sb_uuid}"
> +echo "sw_sb_uuid: ${sw_sb_uuid}"
> +echo "ro_meta: ${ro_meta}"
> +echo "sw_meta: ${sw_meta}"
> +
> +as hv1
> +ovs-vsctl list bridge br-int
> +ro_snat_zone=$(printf %#x $(ovs-vsctl get bridge br-int 
> external-ids:ct-zone-${ro_sb_uuid}_snat | tr -d \"))
> +sw_snat_zone=$(ovs-vsctl get bridge br-int 
> external-ids:ct-zone-${sw_sb_uuid}_snat | tr -d \")
> +
> +echo "ro_snat_zone: ${ro_snat_zone}"
> +echo "sw_snat_zone: ${sw_snat_zone}"
> +
> +as hv1 ovs-ofctl dump-flows br-int > offlows_pre
> +AT_CAPTURE_FILE([offlows_pre])
> +# We should have a flow in table 33 that transitions from the ingress 
> pipeline
> +# to the egress pipeline of gw_router.
> +AT_CHECK_UNQUOTED([grep -c 
> "table=33.*metadata=${ro_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" 
> offlows_pre], [0], [dnl
> +1
> +])
> +
> +# We should have a flow in table 65 that transitions from the egress pipeline
> +# of sw0 to the ingress pipeline of gw_router.
> +AT_CHECK_UNQUOTED([grep -c 
> "table=65.*metadata=${sw_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" 
> offlows_pre], [0], [dnl
> +1
> +])
> +
> +
> +# Request a specific zone for the gateway router. This should then get 
> reflected
> +# both in the OVS database and in the flow table.
> +ovn-nbctl --wait=hv set Logical_Router gw_router options:snat-ct-zone=666
> +
> +ovs-vsctl list bridge br-int
> +
> +as hv1
> +ro_snat_zone=$(ovs-vsctl get bridge br-int 
> external-ids:ct-zone-${ro_sb_uuid}_snat | tr -d \")
> +
> +echo "ro_snat_zone: ${ro_snat_zone}"
> +
> +AT_CHECK([test "${ro_snat_zone}" = "666"], [0], [])
> +
> +ro_snat_zone=$(printf %#x "${ro_snat_zone}")
> +
> +as hv1 ovs-ofctl dump-flows br-int > offlows_post
> +AT_CAPTURE_FILE([offlows_post])
> +# We should have a flow in table 33 that transitions from the ingress 
> pipeline
> +# to the egress pipeline of gw_router.
> +AT_CHECK_UNQUOTED([grep -c 
> "table=33.*metadata=${ro_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" 
> offlows_post], [0], [dnl
> +1
> +])
> +
> +# We should have a flow in table 65 that transitions from the egress pipeline
> +# of sw0 to the ingress pipeline of gw_router.
> +AT_CHECK_UNQUOTED([grep -c 
> "table=65.*metadata=${sw_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" 
> offlows_post], [0], [dnl
> +1
> +])
> +
> +# Set router to use the zone that had been assigned to the switch. The router
> +# should claim the zone and the switch should change zones.
> +ovn-nbctl --wait=hv set Logical_Router gw_router 
> options:snat-ct-zone=${sw_snat_zone}
> +
> +as hv1
> +ro_snat_zone=$(ovs-vsctl get bridge br-int 
> external-ids:ct-zone-${ro_sb_uuid}_snat | tr -d \")
> +
> +echo "ro_snat_zone: ${ro_snat_zone}"
> +
> +AT_CHECK([test "${ro_snat_zone}" = "${sw_snat_zone}"], [0], [])
> +
> +ro_snat_zone=$(printf %#x "${ro_snat_zone}")
> +
> +ovs-vsctl list bridge br-int
> +
> +sw_new_snat_zone=$(ovs-vsctl get bridge br-int 
> external-ids:ct-zone-${sw_sb_uuid}_snat | tr -d \")
> +
> +echo "sw_new_snat_zone: ${sw_new_snat_zone}"
> +
> +AT_CHECK([test "${sw_new_snat_zone}" != "${sw_snat_zone}"], [0], [])
> +
> +as hv1 ovs-ofctl dump-flows br-int > offlows_stolen
> +AT_CAPTURE_FILE([offlows_stolen])
> +# We should have a flow in table 33 that transitions from the ingress 
> pipeline
> +# to the egress pipeline of gw_router.
> +AT_CHECK_UNQUOTED([grep -c 
> "table=33.*metadata=${ro_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" 
> offlows_stolen], [0], [dnl
> +1
> +])
> +
> +# We should have a flow in table 65 that transitions from the egress pipeline
> +# of sw0 to the ingress pipeline of gw_router.
> +AT_CHECK_UNQUOTED([grep -c 
> "table=65.*metadata=${sw_meta}.*load:${ro_snat_zone}->NXM_NX_REG12[]" 
> offlows_stolen], [0], [dnl
> +1
> +])
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> --
> 2.25.4
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to