Thanks Mark! I will update the series according to your suggestions after more reviews land to the series.
On Wed, May 3, 2023 at 4:55 PM Mark Michelson <mmich...@redhat.com> wrote: > > On 5/2/23 21:12, Ihar Hrachyshka wrote: > > When a multichassis port belongs to a switch with a localnet port, > > packets originating or directed to the multichassis port are NOT sent > > thorugh the localnet port. Instead, tunneling is enforced in-cluster to > > guarantee delivery of all packets to all chassis of the port. > > > > This behavior has an unfortunate side effect, where - because of > > additional tunnel header added to each packet - the effective MTU of the > > path for multichassis ports changes from what's set as mtu_request. This > > effectively makes OVN to black hole all packets for the port that use > > full capacity of the interface MTU. This breaks usual TCP / UDP > > services, among other things (SSH, iperf sessions etc.) > > > > This patch adds flows so that > > - (in table 38) detect too-big packets (table 38), and then > > - (in table 39) icmp fragmentation needed / too big errors are sent > > back to offending port. > > > > Once the error is received, the sender is expected to adjust the route > > MTU accordingly, sending the next packets with the new path MTU. After a > > multichassis port is re-assigned to a single chassis, the effective path > > MTU is restored to "usual". Peers will eventually see their "learned" > > path MTU cache expire, which will make them switch back to the "usual" > > MTU. > > > > Among other scenarios, this patch helps to maintain existing services > > working during live migration of a VM, if multichassis ports are used. > > (E.g. in OpenStack Nueutron.) > > > > Fixes: 7084cf437421 ("Always funnel multichassis port traffic through > > tunnels") > > > > Signed-off-by: Ihar Hrachyshka <ihrac...@redhat.com> > > --- > > controller/ovn-controller.c | 3 + > > controller/physical.c | 297 +++++++++++++++++++++++++++++++++++- > > controller/physical.h | 1 + > > lib/ovn-util.h | 11 ++ > > tests/ovn.at | 262 +++++++++++++++++++++++++++++++ > > 5 files changed, 567 insertions(+), 7 deletions(-) > > > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > > index c094cb74d..9359925fa 100644 > > --- a/controller/ovn-controller.c > > +++ b/controller/ovn-controller.c > > @@ -4083,6 +4083,9 @@ static void init_physical_ctx(struct engine_node > > *node, > > p_ctx->patch_ofports = &non_vif_data->patch_ofports; > > p_ctx->chassis_tunnels = &non_vif_data->chassis_tunnels; > > > > + struct controller_engine_ctx *ctrl_ctx = > > engine_get_context()->client_ctx; > > + p_ctx->if_mgr = ctrl_ctx->if_mgr; > > + > > pflow_output_get_debug(node, &p_ctx->debug); > > } > > > > diff --git a/controller/physical.c b/controller/physical.c > > index 1b0482e3b..1c1018616 100644 > > --- a/controller/physical.c > > +++ b/controller/physical.c > > @@ -41,6 +41,7 @@ > > #include "lib/ovn-sb-idl.h" > > #include "lib/ovn-util.h" > > #include "ovn/actions.h" > > +#include "if-status.h" > > #include "physical.h" > > #include "pinctrl.h" > > #include "openvswitch/shash.h" > > @@ -91,6 +92,7 @@ physical_register_ovs_idl(struct ovsdb_idl *ovs_idl) > > > > ovsdb_idl_add_table(ovs_idl, &ovsrec_table_interface); > > ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_name); > > + ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_mtu); > > ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_ofport); > > ovsdb_idl_track_add_column(ovs_idl, > > &ovsrec_interface_col_external_ids); > > } > > @@ -1104,6 +1106,273 @@ setup_activation_strategy(const struct > > sbrec_port_binding *binding, > > } > > } > > > > +static size_t > > +encode_start_controller_op(enum action_opcode opcode, bool pause, > > + uint32_t meter_id, struct ofpbuf *ofpacts) > > +{ > > + size_t ofs = ofpacts->size; > > + > > + struct ofpact_controller *oc = ofpact_put_CONTROLLER(ofpacts); > > + oc->max_len = UINT16_MAX; > > + oc->reason = OFPR_ACTION; > > + oc->pause = pause; > > + if (!ovs_feature_is_supported(OVS_DP_METER_SUPPORT)) { > > + meter_id = NX_CTLR_NO_METER; > > + } > > + oc->meter_id = meter_id; > > + > > + struct action_header ah = { .opcode = htonl(opcode) }; > > + ofpbuf_put(ofpacts, &ah, sizeof ah); > > + > > + return ofs; > > +} > > + > > +static void > > +encode_finish_controller_op(size_t ofs, struct ofpbuf *ofpacts) > > +{ > > + struct ofpact_controller *oc = ofpbuf_at_assert(ofpacts, ofs, sizeof > > *oc); > > + ofpacts->header = oc; > > + oc->userdata_len = ofpacts->size - (ofs + sizeof *oc); > > + ofpact_finish_CONTROLLER(ofpacts, &oc); > > +} > > ecnode_start_controller_op() and encode_finish_controller_op() appear to > be copied from lib/actions.c . I think it's reasonable to expose these > functions in lib/actions.h and call them instead of redefining them. > You are right, and I already forgot I did copy them in my initial PoC (back in Oct 2022?) I will make them public and reuse, thanks for spotting it. > > + > > +/* > > + * Insert a flow to determine if an IP packet is too big for the > > corresponding > > + * egress interface. > > + */ > > +static void > > +determine_if_pkt_too_big(struct ovn_desired_flow_table *flow_table, > > + const struct sbrec_port_binding *binding, > > + const struct sbrec_port_binding *mcp, > > + uint16_t mtu, bool is_ipv6, int direction) > > +{ > > + struct ofpbuf ofpacts; > > + ofpbuf_init(&ofpacts, 0); > > + > > + /* Store packet too large flag in reg9[1]. */ > > + struct match match; > > + match_init_catchall(&match); > > + match_set_dl_type(&match, htons(is_ipv6 ? ETH_TYPE_IPV6 : > > ETH_TYPE_IP)); > > + match_set_metadata(&match, htonll(binding->datapath->tunnel_key)); > > + match_set_reg(&match, direction - MFF_REG0, mcp->tunnel_key); > > + > > + /* reg9[1] is REGBIT_PKT_LARGER as defined by northd */ > > + struct ofpact_check_pkt_larger *pkt_larger = > > + ofpact_put_CHECK_PKT_LARGER(&ofpacts); > > + pkt_larger->pkt_len = mtu; > > + pkt_larger->dst.field = mf_from_id(MFF_REG9); > > + pkt_larger->dst.ofs = 1; > > + > > + put_resubmit(OFTABLE_OUTPUT_LARGE_PKT_PROCESS, &ofpacts); > > + ofctrl_add_flow(flow_table, OFTABLE_OUTPUT_LARGE_PKT_DETECT, 100, > > + binding->header_.uuid.parts[0], &match, &ofpacts, > > + &binding->header_.uuid); > > + ofpbuf_uninit(&ofpacts); > > +} > > + > > +/* > > + * Insert a flow to reply with ICMP error for IP packets that are too big > > for > > + * the corresponding egress interface. > > + */ > > +/* > > + * NOTE(ihrachys) This reimplements icmp_error as found in > > + * build_icmperr_pkt_big_flows. We may look into reusing the existing OVN > > + * action for this flow in the future. > > + */ > > +static void > > +reply_imcp_error_if_pkt_too_big(struct ovn_desired_flow_table *flow_table, > > + const struct sbrec_port_binding *binding, > > + const struct sbrec_port_binding *mcp, > > + uint16_t mtu, bool is_ipv6, int direction) > > +{ > > + struct match match; > > + match_init_catchall(&match); > > + match_set_dl_type(&match, htons(is_ipv6 ? ETH_TYPE_IPV6 : > > ETH_TYPE_IP)); > > + match_set_metadata(&match, htonll(binding->datapath->tunnel_key)); > > + match_set_reg(&match, direction - MFF_REG0, mcp->tunnel_key); > > + match_set_reg_masked(&match, MFF_REG9 - MFF_REG0, 1 << 1, 1 << 1); > > + > > + /* Return ICMP error with a part of the original IP packet included. */ > > + struct ofpbuf ofpacts; > > + ofpbuf_init(&ofpacts, 0); > > + size_t oc_offset = encode_start_controller_op( > > + ACTION_OPCODE_ICMP, true, NX_CTLR_NO_METER, &ofpacts); > > + > > + struct ofpbuf inner_ofpacts; > > + ofpbuf_init(&inner_ofpacts, 0); > > + > > + /* The error packet is no longer too large, set REGBIT_PKT_LARGER = 0 > > */ > > + /* reg9[1] is REGBIT_PKT_LARGER as defined by northd */ > > + ovs_be32 value = htonl(0); > > + ovs_be32 mask = htonl(1 << 1); > > + ofpact_put_set_field( > > + &inner_ofpacts, mf_from_id(MFF_REG9), &value, &mask); > > + > > + /* The new error packet is delivered locally */ > > + /* REGBIT_EGRESS_LOOPBACK = 1 */ > > + value = htonl(1 << MLF_ALLOW_LOOPBACK_BIT); > > + mask = htonl(1 << MLF_ALLOW_LOOPBACK_BIT); > > + ofpact_put_set_field( > > + &inner_ofpacts, mf_from_id(MFF_LOG_FLAGS), &value, &mask); > > + > > + /* eth.src <-> eth.dst */ > > + put_stack(MFF_ETH_DST, ofpact_put_STACK_PUSH(&inner_ofpacts)); > > + put_stack(MFF_ETH_SRC, ofpact_put_STACK_PUSH(&inner_ofpacts)); > > + put_stack(MFF_ETH_DST, ofpact_put_STACK_POP(&inner_ofpacts)); > > + put_stack(MFF_ETH_SRC, ofpact_put_STACK_POP(&inner_ofpacts)); > > + > > + /* ip.src <-> ip.dst */ > > + put_stack(is_ipv6 ? MFF_IPV6_DST : MFF_IPV4_DST, > > + ofpact_put_STACK_PUSH(&inner_ofpacts)); > > + put_stack(is_ipv6 ? MFF_IPV6_SRC : MFF_IPV4_SRC, > > + ofpact_put_STACK_PUSH(&inner_ofpacts)); > > + put_stack(is_ipv6 ? MFF_IPV6_DST : MFF_IPV4_DST, > > + ofpact_put_STACK_POP(&inner_ofpacts)); > > + put_stack(is_ipv6 ? MFF_IPV6_SRC : MFF_IPV4_SRC, > > + ofpact_put_STACK_POP(&inner_ofpacts)); > > + > > + /* ip.ttl = 255 */ > > + struct ofpact_ip_ttl *ip_ttl = ofpact_put_SET_IP_TTL(&inner_ofpacts); > > + ip_ttl->ttl = 255; > > + > > + uint16_t frag_mtu = mtu - ETHERNET_OVERHEAD; > > + size_t frag_mtu_oc_offset; > > + if (is_ipv6) { > > + /* icmp6.type = 2 (Packet Too Big) */ > > + /* icmp6.code = 0 */ > > + uint8_t icmp_type = 2; > > + uint8_t icmp_code = 0; > > + ofpact_put_set_field( > > + &inner_ofpacts, mf_from_id(MFF_ICMPV6_TYPE), &icmp_type, NULL); > > + ofpact_put_set_field( > > + &inner_ofpacts, mf_from_id(MFF_ICMPV6_CODE), &icmp_code, NULL); > > + > > + /* icmp6.frag_mtu */ > > + frag_mtu_oc_offset = encode_start_controller_op( > > + ACTION_OPCODE_PUT_ICMP6_FRAG_MTU, true, NX_CTLR_NO_METER, > > + &inner_ofpacts); > > + ovs_be32 frag_mtu_ovs = htonl(frag_mtu); > > + ofpbuf_put(&inner_ofpacts, &frag_mtu_ovs, sizeof(frag_mtu_ovs)); > > + } else { > > + /* icmp4.type = 3 (Destination Unreachable) */ > > + /* icmp4.code = 4 (Fragmentation Needed) */ > > + uint8_t icmp_type = 3; > > + uint8_t icmp_code = 4; > > + ofpact_put_set_field( > > + &inner_ofpacts, mf_from_id(MFF_ICMPV4_TYPE), &icmp_type, NULL); > > + ofpact_put_set_field( > > + &inner_ofpacts, mf_from_id(MFF_ICMPV4_CODE), &icmp_code, NULL); > > + > > + /* icmp4.frag_mtu = */ > > + frag_mtu_oc_offset = encode_start_controller_op( > > + ACTION_OPCODE_PUT_ICMP4_FRAG_MTU, true, NX_CTLR_NO_METER, > > + &inner_ofpacts); > > + ovs_be16 frag_mtu_ovs = htons(frag_mtu); > > + ofpbuf_put(&inner_ofpacts, &frag_mtu_ovs, sizeof(frag_mtu_ovs)); > > + } > > + encode_finish_controller_op(frag_mtu_oc_offset, &inner_ofpacts); > > + > > + /* Finally, submit the ICMP error back to the ingress pipeline */ > > + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &inner_ofpacts); > > + > > + /* Attach nested actions to ICMP error controller handler */ > > + ofpacts_put_openflow_actions(inner_ofpacts.data, inner_ofpacts.size, > > + &ofpacts, OFP15_VERSION); > > + > > + /* Finalize the ICMP error controller handler */ > > + encode_finish_controller_op(oc_offset, &ofpacts); > > + > > + ofctrl_add_flow(flow_table, OFTABLE_OUTPUT_LARGE_PKT_PROCESS, 100, > > + binding->header_.uuid.parts[0], &match, &ofpacts, > > + &binding->header_.uuid); > > + > > + ofpbuf_uninit(&inner_ofpacts); > > + ofpbuf_uninit(&ofpacts); > > +} > > + > > +static uint16_t > > +get_tunnel_overhead(struct chassis_tunnel const *tun) > > +{ > > + uint16_t overhead = 0; > > + enum chassis_tunnel_type type = tun->type; > > + if (type == GENEVE) { > > + overhead += GENEVE_TUNNEL_OVERHEAD; > > + } else if (type == STT) { > > + overhead += STT_TUNNEL_OVERHEAD; > > + } else if (type == VXLAN) { > > + overhead += VXLAN_TUNNEL_OVERHEAD; > > + } else { > > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > > + VLOG_WARN_RL(&rl, "Unknown tunnel type %d, can't determine > > overhead " > > + "size for Path MTU Discovery", type); > > + return 0; > > + } > > + overhead += tun->is_ipv6? IPV6_HEADER_LEN : IPV4_HEADER_LEN; > > + return overhead; > > +} > > + > > +static uint16_t > > +get_effective_mtu(const struct sbrec_port_binding *mcp, > > + struct ovs_list *remote_tunnels, > > + const struct if_status_mgr *if_mgr) > > +{ > > + /* Use interface MTU as a base for calculation */ > > + uint16_t iface_mtu = if_status_mgr_iface_get_mtu(if_mgr, > > + mcp->logical_port); > > + if (!iface_mtu) { > > + return 0; > > + } > > + > > + /* Iterate over all peer tunnels and find the biggest tunnel overhead > > */ > > + uint16_t overhead = 0; > > + struct tunnel *tun; > > + LIST_FOR_EACH (tun, list_node, remote_tunnels) { > > + uint16_t tunnel_overhead = get_tunnel_overhead(tun->tun); > > + if (tunnel_overhead > overhead) { > > + overhead = tunnel_overhead; > > + } > > + } > > + if (!overhead) { > > + return 0; > > + } > > + > > + return iface_mtu - overhead; > > +} > > + > > +static void > > +handle_pkt_too_big_for_ip_version(struct ovn_desired_flow_table > > *flow_table, > > + const struct sbrec_port_binding *binding, > > + const struct sbrec_port_binding *mcp, > > + uint16_t mtu, bool is_ipv6) > > +{ > > + /* ingress */ > > + determine_if_pkt_too_big(flow_table, binding, mcp, mtu, is_ipv6, > > + MFF_LOG_INPORT); > > + reply_imcp_error_if_pkt_too_big(flow_table, binding, mcp, mtu, is_ipv6, > > + MFF_LOG_INPORT); > > + > > + /* egress */ > > + determine_if_pkt_too_big(flow_table, binding, mcp, mtu, is_ipv6, > > + MFF_LOG_OUTPORT); > > + reply_imcp_error_if_pkt_too_big(flow_table, binding, mcp, mtu, is_ipv6, > > + MFF_LOG_OUTPORT); > > +} > > + > > +static void > > +handle_pkt_too_big(struct ovn_desired_flow_table *flow_table, > > + struct ovs_list *remote_tunnels, > > + const struct sbrec_port_binding *binding, > > + const struct sbrec_port_binding *mcp, > > + const struct if_status_mgr *if_mgr) > > +{ > > + uint16_t mtu = get_effective_mtu(mcp, remote_tunnels, if_mgr); > > + if (!mtu) { > > + return; > > + } > > + handle_pkt_too_big_for_ip_version(flow_table, binding, mcp, mtu, > > false); > > + handle_pkt_too_big_for_ip_version(flow_table, binding, mcp, mtu, true); > > +} > > + > > static void > > enforce_tunneling_for_multichassis_ports( > > struct local_datapath *ld, > > @@ -1111,7 +1380,8 @@ enforce_tunneling_for_multichassis_ports( > > const struct sbrec_chassis *chassis, > > const struct hmap *chassis_tunnels, > > enum mf_field_id mff_ovn_geneve, > > - struct ovn_desired_flow_table *flow_table) > > + struct ovn_desired_flow_table *flow_table, > > + const struct if_status_mgr *if_mgr) > > { > > if (shash_is_empty(&ld->multichassis_ports)) { > > return; > > @@ -1156,6 +1426,8 @@ enforce_tunneling_for_multichassis_ports( > > binding->header_.uuid.parts[0], &match, &ofpacts, > > &binding->header_.uuid); > > ofpbuf_uninit(&ofpacts); > > + > > + handle_pkt_too_big(flow_table, tuns, binding, mcp, if_mgr); > > } > > > > struct tunnel *tun_elem; > > @@ -1177,6 +1449,7 @@ consider_port_binding(struct ovsdb_idl_index > > *sbrec_port_binding_by_name, > > const struct sbrec_port_binding *binding, > > const struct sbrec_chassis *chassis, > > const struct physical_debug *debug, > > + const struct if_status_mgr *if_mgr, > > struct ovn_desired_flow_table *flow_table, > > struct ofpbuf *ofpacts_p) > > { > > @@ -1602,8 +1875,10 @@ consider_port_binding(struct ovsdb_idl_index > > *sbrec_port_binding_by_name, > > binding->header_.uuid.parts[0], > > &match, ofpacts_p, &binding->header_.uuid); > > > > - enforce_tunneling_for_multichassis_ports( > > - ld, binding, chassis, chassis_tunnels, mff_ovn_geneve, > > flow_table); > > + enforce_tunneling_for_multichassis_ports(ld, binding, chassis, > > + chassis_tunnels, > > + mff_ovn_geneve, > > flow_table, > > + if_mgr); > > > > /* No more tunneling to set up. */ > > goto out; > > @@ -1908,7 +2183,7 @@ physical_eval_port_binding(struct physical_ctx *p_ctx, > > p_ctx->patch_ofports, > > p_ctx->chassis_tunnels, > > pb, p_ctx->chassis, &p_ctx->debug, > > - flow_table, &ofpacts); > > + p_ctx->if_mgr, flow_table, &ofpacts); > > ofpbuf_uninit(&ofpacts); > > } > > > > @@ -2032,7 +2307,7 @@ physical_run(struct physical_ctx *p_ctx, > > p_ctx->patch_ofports, > > p_ctx->chassis_tunnels, binding, > > p_ctx->chassis, &p_ctx->debug, > > - flow_table, &ofpacts); > > + p_ctx->if_mgr, flow_table, &ofpacts); > > } > > > > /* Handle output to multicast groups, in tables 40 and 41. */ > > @@ -2176,11 +2451,19 @@ physical_run(struct physical_ctx *p_ctx, > > ofctrl_add_flow(flow_table, OFTABLE_OUTPUT_LARGE_PKT_DETECT, 0, 0, > > &match, > > &ofpacts, hc_uuid); > > > > + match_init_catchall(&match); > > + match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, > > + MLF_ALLOW_LOOPBACK, MLF_ALLOW_LOOPBACK); > > + ofpbuf_clear(&ofpacts); > > + put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); > > + ofctrl_add_flow(flow_table, OFTABLE_OUTPUT_LARGE_PKT_PROCESS, 10, 0, > > + &match, &ofpacts, hc_uuid); > > + > > match_init_catchall(&match); > > ofpbuf_clear(&ofpacts); > > put_resubmit(OFTABLE_REMOTE_OUTPUT, &ofpacts); > > - ofctrl_add_flow(flow_table, OFTABLE_OUTPUT_LARGE_PKT_PROCESS, 0, 0, > > &match, > > - &ofpacts, hc_uuid); > > + ofctrl_add_flow(flow_table, OFTABLE_OUTPUT_LARGE_PKT_PROCESS, 0, 0, > > + &match, &ofpacts, hc_uuid); > > > > /* Table 40, priority 150. > > * ======================= > > diff --git a/controller/physical.h b/controller/physical.h > > index f450dca94..396bcb138 100644 > > --- a/controller/physical.h > > +++ b/controller/physical.h > > @@ -57,6 +57,7 @@ struct physical_ctx { > > const struct sbrec_chassis_table *chassis_table; > > const struct sbrec_chassis *chassis; > > const struct sset *active_tunnels; > > + const struct if_status_mgr *if_mgr; > > struct hmap *local_datapaths; > > struct sset *local_lports; > > const struct simap *ct_zones; > > diff --git a/lib/ovn-util.h b/lib/ovn-util.h > > index 7cf861dbc..7ec2bca48 100644 > > --- a/lib/ovn-util.h > > +++ b/lib/ovn-util.h > > @@ -29,6 +29,17 @@ > > #define ROUTE_ORIGIN_CONNECTED "connected" > > #define ROUTE_ORIGIN_STATIC "static" > > > > +#define ETH_HEADER_LENGTH 14 > > +#define ETH_CRC_LENGTH 4 > > +#define ETHERNET_OVERHEAD (ETH_HEADER_LENGTH + ETH_CRC_LENGTH) > > + > > +#define IPV4_HEADER_LEN 20 > > +#define IPV6_HEADER_LEN 40 > > ETH_HEADER_LENGTH, IPV4_HEADER_LEN, and IPV6_HEADER_LEN are already > defined in OVS in lib/packets.h. You can reuse those values instead of > redefining them. > Thanks! I looked for them before adding here, but clearly my search skills are not as great as yours. :) > > + > > +#define GENEVE_TUNNEL_OVERHEAD 38 > > +#define STT_TUNNEL_OVERHEAD 18 > > +#define VXLAN_TUNNEL_OVERHEAD 30 > > + > > struct eth_addr; > > struct nbrec_logical_router_port; > > struct ovsrec_flow_sample_collector_set_table; > > diff --git a/tests/ovn.at b/tests/ovn.at > > index b0439d99e..99ce3dd90 100644 > > --- a/tests/ovn.at > > +++ b/tests/ovn.at > > @@ -15194,6 +15194,268 @@ OVN_CLEANUP([hv1],[hv2],[hv3]) > > AT_CLEANUP > > ]) > > > > +m4_define([MULTICHASSIS_PATH_MTU_DISCOVERY_TEST], > > + [OVN_FOR_EACH_NORTHD([ > > + AT_SETUP([localnet connectivity with multiple requested-chassis, path > > mtu discovery (ip=$1, tunnel=$2, mtu=$3)]) > > + AT_KEYWORDS([multi-chassis]) > > + > > + ovn_start > > + > > + net_add n1 > > + for i in 1 2; do > > + sim_add hv$i > > + as hv$i > > + check ovs-vsctl add-br br-phys > > + if test "x$1" = "xipv6"; then > > + ovn_attach n1 br-phys fd00::$i 64 $2 > > + else > > + ovn_attach n1 br-phys 192.168.0.$i 24 $2 > > + fi > > + check ovs-vsctl set open . > > external-ids:ovn-bridge-mappings=phys:br-phys > > + done > > + > > + first_mac=00:00:00:00:00:01 > > + second_mac=00:00:00:00:00:02 > > + multi1_mac=00:00:00:00:00:f0 > > + multi2_mac=00:00:00:00:00:f1 > > + first_ip=10.0.0.1 > > + second_ip=10.0.0.2 > > + multi1_ip=10.0.0.10 > > + multi2_ip=10.0.0.20 > > + first_ip6=abcd::1 > > + second_ip6=abcd::2 > > + multi1_ip6=abcd::f0 > > + multi2_ip6=abcd::f1 > > + > > + check ovn-nbctl ls-add ls0 > > + check ovn-nbctl lsp-add ls0 first > > + check ovn-nbctl lsp-add ls0 second > > + check ovn-nbctl lsp-add ls0 multi1 > > + check ovn-nbctl lsp-add ls0 multi2 > > + check ovn-nbctl lsp-set-addresses first "${first_mac} ${first_ip} > > ${first_ip6}" > > + check ovn-nbctl lsp-set-addresses second "${second_mac} ${second_ip} > > ${second_ip6}" > > + check ovn-nbctl lsp-set-addresses multi1 "${multi1_mac} ${multi1_ip} > > ${multi1_ip6}" > > + check ovn-nbctl lsp-set-addresses multi2 "${multi2_mac} ${multi2_ip} > > ${multi2_ip6}" > > + > > + check ovn-nbctl lsp-add ls0 public > > + check ovn-nbctl lsp-set-type public localnet > > + check ovn-nbctl lsp-set-addresses public unknown > > + check ovn-nbctl lsp-set-options public network_name=phys > > + > > + check ovn-nbctl lsp-set-options first requested-chassis=hv1 > > + check ovn-nbctl lsp-set-options second requested-chassis=hv2 > > + check ovn-nbctl lsp-set-options multi1 requested-chassis=hv1,hv2 > > + check ovn-nbctl lsp-set-options multi2 requested-chassis=hv1,hv2 > > + > > + as hv1 check ovs-vsctl -- add-port br-int first -- \ > > + set Interface first external-ids:iface-id=first \ > > + options:tx_pcap=hv1/first-tx.pcap \ > > + options:rxq_pcap=hv1/first-rx.pcap \ > > + ofport-request=1 > > + as hv2 check ovs-vsctl -- add-port br-int second -- \ > > + set Interface second external-ids:iface-id=second \ > > + options:tx_pcap=hv2/second-tx.pcap \ > > + options:rxq_pcap=hv2/second-rx.pcap \ > > + ofport-request=2 > > + > > + # Create Migrator interfaces on both hv1 and hv2 > > + for hv in hv1 hv2; do > > + for i in 1 2; do > > + as $hv check ovs-vsctl -- add-port br-int multi${i} -- \ > > + set Interface multi${i} external-ids:iface-id=multi${i} \ > > + options:tx_pcap=$hv/multi${i}-tx.pcap \ > > + options:rxq_pcap=$hv/multi${i}-rx.pcap \ > > + ofport-request=${i}00 > > + done > > + done > > + > > + send_ip_packet() { > > + local inport=${1} hv=${2} eth_src=${3} eth_dst=${4} ipv4_src=${5} > > ipv4_dst=${6} data=${7} fail=${8} > > + packet=$(fmt_pkt " > > + Ether(dst='${eth_dst}', src='${eth_src}') / > > + IP(src='${ipv4_src}', dst='${ipv4_dst}') / > > + ICMP(type=8) / bytes.fromhex('${data}') > > + ") > > + as hv${hv} ovs-appctl netdev-dummy/receive ${inport} ${packet} > > + if [[ x"${fail}" != x0 ]]; then > > + original_ip_frame=$(fmt_pkt " > > + IP(src='${ipv4_src}', dst='${ipv4_dst}') / > > + ICMP(type=8) / bytes.fromhex('${data}') > > + ") > > + # IP(flags=2) means DF (Don't Fragment) = 1 > > + # ICMP(type=3, code=4) means Destination Unreachable, > > Fragmentation Needed > > + packet=$(fmt_pkt " > > + Ether(dst='${eth_src}', src='${eth_dst}') / > > + IP(src='${ipv4_dst}', dst='${ipv4_src}', ttl=255, flags=2, > > id=0) / > > + ICMP(type=3, code=4, nexthopmtu=$3) / > > + bytes.fromhex('${original_ip_frame:0:$((534 * 2))}') > > + ") > > + fi > > + echo ${packet} > > + } > > + > > + send_ip6_packet() { > > + local inport=${1} hv=${2} eth_src=${3} eth_dst=${4} ipv6_src=${5} > > ipv6_dst=${6} data=${7} fail=${8} > > + packet=$(fmt_pkt " > > + Ether(dst='${eth_dst}', src='${eth_src}') / > > + IPv6(src='${ipv6_src}', dst='${ipv6_dst}') / > > + ICMPv6EchoRequest() / bytes.fromhex('${data}') > > + ") > > + as hv${hv} ovs-appctl netdev-dummy/receive ${inport} ${packet} > > + if [[ x"${fail}" != x0 ]]; then > > + original_ip_frame=$(fmt_pkt " > > + IPv6(src='${ipv6_src}', dst='${ipv6_dst}') / > > + ICMPv6EchoRequest() / bytes.fromhex('${data}') > > + ") > > + packet=$(fmt_pkt " > > + Ether(dst='${eth_src}', src='${eth_dst}') / > > + IPv6(src='${ipv6_dst}', dst='${ipv6_src}', hlim=255) / > > + ICMPv6PacketTooBig(mtu=$3) / > > + bytes.fromhex('${original_ip_frame:0:$((1218 * 2))}') > > + ") > > + fi > > + echo ${packet} > > + } > > + > > + reset_env() { > > + for port in first multi1 multi2; do > > + as hv1 reset_pcap_file $port hv1/$port > > + done > > + for port in second multi1 multi2; do > > + as hv2 reset_pcap_file $port hv2/$port > > + done > > + for port in hv1/multi1 hv2/multi1 hv1/multi2 hv2/multi2 hv1/first > > hv2/second; do > > + : > $port.expected > > + done > > + } > > + > > + check_pkts() { > > + for port in hv1/multi1 hv2/multi1 hv1/multi2 hv2/multi2 hv1/first > > hv2/second; do > > + OVN_CHECK_PACKETS_REMOVE_BROADCAST([${port}-tx.pcap], > > [${port}.expected]) > > + done > > + } > > + > > + payload() { > > + echo $(xxd -l ${1} -c ${1} -p < /dev/urandom) > > + } > > + > > + wait_for_ports_up > > + OVN_POPULATE_ARP > > + > > + reset_env > > + > > + AS_BOX([Packets of proper size are delivered from multichassis to > > regular ports]) > > + > > + len=1000 > > + packet=$(send_ip_packet multi1 1 $multi1_mac $first_mac $multi1_ip > > $first_ip $(payload $len) 0) > > + echo $packet >> hv1/first.expected > > + > > + packet=$(send_ip_packet multi1 1 $multi1_mac $second_mac $multi1_ip > > $second_ip $(payload $len) 0) > > + echo $packet >> hv2/second.expected > > + > > + packet=$(send_ip6_packet multi1 1 $multi1_mac $first_mac $multi1_ip6 > > $first_ip6 $(payload $len) 0) > > + echo $packet >> hv1/first.expected > > + > > + packet=$(send_ip6_packet multi1 1 $multi1_mac $second_mac $multi1_ip6 > > $second_ip6 $(payload $len) 0) > > + echo $packet >> hv2/second.expected > > + > > + check_pkts > > + reset_env > > + > > + AS_BOX([Oversized packets are not delivered from multichassis to > > regular ports]) > > + > > + len=3000 > > + packet=$(send_ip_packet multi1 1 $multi1_mac $first_mac $multi1_ip > > $first_ip $(payload $len) 1) > > + echo $packet >> hv1/multi1.expected > > + > > + packet=$(send_ip_packet multi1 1 $multi1_mac $second_mac $multi1_ip > > $second_ip $(payload $len) 1) > > + echo $packet >> hv1/multi1.expected > > + > > + packet=$(send_ip6_packet multi1 1 $multi1_mac $first_mac $multi1_ip6 > > $first_ip6 $(payload $len) 1) > > + echo $packet >> hv1/multi1.expected > > + > > + packet=$(send_ip6_packet multi1 1 $multi1_mac $second_mac $multi1_ip6 > > $second_ip6 $(payload $len) 1) > > + echo $packet >> hv1/multi1.expected > > + > > + check_pkts > > + reset_env > > + > > + AS_BOX([Packets of proper size are delivered from regular to > > multichassis ports]) > > + > > + len=1000 > > + packet=$(send_ip_packet first 1 $first_mac $multi1_mac $first_ip > > $multi1_ip $(payload $len) 0) > > + echo $packet >> hv1/multi1.expected > > + echo $packet >> hv2/multi1.expected > > + > > + packet=$(send_ip_packet second 2 $second_mac $multi1_mac $second_ip > > $multi1_ip $(payload $len) 0) > > + echo $packet >> hv1/multi1.expected > > + echo $packet >> hv2/multi1.expected > > + > > + packet=$(send_ip6_packet first 1 $first_mac $multi1_mac $first_ip6 > > $multi1_ip6 $(payload $len) 0) > > + echo $packet >> hv1/multi1.expected > > + echo $packet >> hv2/multi1.expected > > + > > + packet=$(send_ip6_packet second 2 $second_mac $multi1_mac $second_ip6 > > $multi1_ip6 $(payload $len) 0) > > + echo $packet >> hv1/multi1.expected > > + echo $packet >> hv2/multi1.expected > > + > > + check_pkts > > + reset_env > > + > > + AS_BOX([Oversized packets are not delivered from regular to > > multichassis ports]) > > + > > + len=3000 > > + packet=$(send_ip_packet first 1 $first_mac $multi1_mac $first_ip > > $multi1_ip $(payload $len) 1) > > + echo $packet >> hv1/first.expected > > + > > + packet=$(send_ip_packet second 2 $second_mac $multi1_mac $second_ip > > $multi1_ip $(payload $len) 1) > > + echo $packet >> hv2/second.expected > > + > > + packet=$(send_ip6_packet first 1 $first_mac $multi1_mac $first_ip6 > > $multi1_ip6 $(payload $len) 1) > > + echo $packet >> hv1/first.expected > > + > > + packet=$(send_ip6_packet second 2 $second_mac $multi1_mac $second_ip6 > > $multi1_ip6 $(payload $len) 1) > > + echo $packet >> hv2/second.expected > > + > > + check_pkts > > + reset_env > > + > > + AS_BOX([Packets of proper size are delivered from multichassis to > > multichassis ports]) > > + > > + len=1000 > > + packet=$(send_ip_packet multi1 1 $multi1_mac $multi2_mac $multi1_ip > > $multi2_ip $(payload $len) 0) > > + echo $packet >> hv1/multi2.expected > > + echo $packet >> hv2/multi2.expected > > + > > + packet=$(send_ip6_packet multi1 1 $multi1_mac $multi2_mac $multi1_ip6 > > $multi2_ip6 $(payload $len) 0) > > + echo $packet >> hv1/multi2.expected > > + echo $packet >> hv2/multi2.expected > > + > > + check_pkts > > + reset_env > > + > > + AS_BOX([Oversized packets are not delivered from multichassis to > > multichassis ports]) > > + > > + len=3000 > > + packet=$(send_ip_packet multi1 1 $multi1_mac $multi2_mac $multi1_ip > > $multi2_ip $(payload $len) 1) > > + echo $packet >> hv1/multi1.expected > > + > > + packet=$(send_ip6_packet multi1 1 $multi1_mac $multi2_mac $multi1_ip6 > > $multi2_ip6 $(payload $len) 1) > > + echo $packet >> hv1/multi1.expected > > + > > + check_pkts > > + > > + OVN_CLEANUP([hv1],[hv2]) > > + > > + AT_CLEANUP > > + ])]) > > + > > +# NOTE(ihar) no STT variants because it's not supported by upstream kernels > > +MULTICHASSIS_PATH_MTU_DISCOVERY_TEST([ipv4], [geneve], [1424]) > > +MULTICHASSIS_PATH_MTU_DISCOVERY_TEST([ipv6], [geneve], [1404]) > > +MULTICHASSIS_PATH_MTU_DISCOVERY_TEST([ipv4], [vxlan], [1432]) > > +MULTICHASSIS_PATH_MTU_DISCOVERY_TEST([ipv6], [vxlan], [1412]) > > + > > OVN_FOR_EACH_NORTHD([ > > AT_SETUP([options:activation-strategy for logical port]) > > AT_KEYWORDS([multi-chassis]) > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev