Introduce the nexthop identifier in the ct_label.label field for ecmp-symmetric replies connections. This field will be used by ovn-controller to track ct entries and to flush them if requested by the CMS (e.g. removing the related static routes).
Acked-by: Ales Musil <amu...@redhat.com> Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com> --- northd/en-lflow.c | 3 +++ northd/inc-proc-northd.c | 1 + northd/northd.c | 35 +++++++++++++++++------- northd/northd.h | 1 + tests/ovn.at | 4 +-- tests/system-ovn.at | 58 +++++++++++++++++++++++----------------- 6 files changed, 66 insertions(+), 36 deletions(-) diff --git a/northd/en-lflow.c b/northd/en-lflow.c index 3dba5034b..b4df49076 100644 --- a/northd/en-lflow.c +++ b/northd/en-lflow.c @@ -54,6 +54,8 @@ lflow_get_input_data(struct engine_node *node, engine_get_input_data("lr_stateful", node); struct ed_type_ls_stateful *ls_stateful_data = engine_get_input_data("ls_stateful", node); + struct ecmp_nexthop_data *nexthop_data = + engine_get_input_data("ecmp_nexthop", node); lflow_input->sbrec_logical_flow_table = EN_OVSDB_GET(engine_get_input("SB_logical_flow", node)); @@ -83,6 +85,7 @@ lflow_get_input_data(struct engine_node *node, lflow_input->parsed_routes = &static_routes_data->parsed_routes; lflow_input->route_tables = &static_routes_data->route_tables; lflow_input->route_policies = &route_policies_data->route_policies; + lflow_input->nexthops_table = &nexthop_data->nexthops; struct ed_type_global_config *global_config = engine_get_input_data("global_config", node); diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c index 22735bead..9a25f50c4 100644 --- a/northd/inc-proc-northd.c +++ b/northd/inc-proc-northd.c @@ -277,6 +277,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, engine_add_input(&en_lflow, &en_bfd_sync, NULL); engine_add_input(&en_lflow, &en_route_policies, NULL); engine_add_input(&en_lflow, &en_static_routes, NULL); + engine_add_input(&en_lflow, &en_ecmp_nexthop, NULL); engine_add_input(&en_lflow, &en_global_config, node_global_config_handler); engine_add_input(&en_lflow, &en_northd, lflow_northd_handler); diff --git a/northd/northd.c b/northd/northd.c index 10e1dc60a..72c40d21e 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -10791,7 +10791,8 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, struct ovn_port *out_port, const struct parsed_route *route, struct ds *route_match, - struct lflow_ref *lflow_ref) + struct lflow_ref *lflow_ref, + struct simap *nexthops_table) { const struct nbrec_logical_router_static_route *st_route = route->route; struct ds match = DS_EMPTY_INITIALIZER; @@ -10826,9 +10827,15 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, ds_put_cstr(&match, " && !ct.rpl && (ct.new || ct.est)"); ds_put_format(&actions, "ct_commit { ct_label.ecmp_reply_eth = eth.src; " - "ct_mark.ecmp_reply_port = %" PRId64 ";}; " - "next;", + "ct_mark.ecmp_reply_port = %" PRId64 ";", out_port->sb->tunnel_key); + + struct simap_node *n = simap_find(nexthops_table, st_route->nexthop); + if (n) { + ds_put_format(&actions, " ct_label.label = %d;", n->data); + } + ds_put_cstr(&actions, " }; next;"); + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100, ds_cstr(&match), ds_cstr(&actions), &st_route->header_, @@ -10885,7 +10892,8 @@ add_ecmp_symmetric_reply_flows(struct lflow_table *lflows, static void build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, const struct hmap *lr_ports, struct ecmp_groups_node *eg, - struct lflow_ref *lflow_ref) + struct lflow_ref *lflow_ref, + struct simap *nexthops_table) { bool is_ipv4 = IN6_IS_ADDR_V4MAPPED(&eg->prefix); @@ -10942,7 +10950,7 @@ build_ecmp_route_flow(struct lflow_table *lflows, struct ovn_datapath *od, out_port->key)) { add_ecmp_symmetric_reply_flows(lflows, od, lrp_addr_s, out_port, route_, &route_match, - lflow_ref); + lflow_ref, nexthops_table); } ds_clear(&match); ds_put_format(&match, REG_ECMP_GROUP_ID" == %"PRIu16" && " @@ -12830,7 +12838,8 @@ static void build_static_route_flows_for_lrouter( struct ovn_datapath *od, struct lflow_table *lflows, const struct hmap *lr_ports, struct hmap *parsed_routes, - struct simap *route_tables, struct lflow_ref *lflow_ref) + struct simap *route_tables, struct lflow_ref *lflow_ref, + struct simap *nexthops_table) { ovs_assert(od->nbr); ovn_lflow_add_default_drop(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, @@ -12872,7 +12881,8 @@ build_static_route_flows_for_lrouter( HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) { /* add a flow in IP_ROUTING, and one flow for each member in * IP_ROUTING_ECMP. */ - build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref); + build_ecmp_route_flow(lflows, od, lr_ports, group, lflow_ref, + nexthops_table); } const struct unique_routes_node *ur; HMAP_FOR_EACH (ur, hmap_node, &unique_routes) { @@ -16066,6 +16076,7 @@ struct lswitch_flow_build_info { struct hmap *parsed_routes; struct hmap *route_policies; struct simap *route_tables; + struct simap *nexthops_table; }; /* Helper function to combine all lflow generation which is iterated by @@ -16112,7 +16123,7 @@ build_lswitch_and_lrouter_iterate_by_lr(struct ovn_datapath *od, build_ip_routing_pre_flows_for_lrouter(od, lsi->lflows, NULL); build_static_route_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports, lsi->parsed_routes, lsi->route_tables, - NULL); + NULL, lsi->nexthops_table); build_mcast_lookup_flows_for_lrouter(od, lsi->lflows, &lsi->match, &lsi->actions, NULL); build_ingress_policy_flows_for_lrouter(od, lsi->lflows, lsi->lr_ports, @@ -16433,7 +16444,8 @@ build_lswitch_and_lrouter_flows( const char *svc_monitor_mac, struct hmap *parsed_routes, struct hmap *route_policies, - struct simap *route_tables) + struct simap *route_tables, + struct simap *nexthops_table) { char *svc_check_match = xasprintf("eth.dst == %s", svc_monitor_mac); @@ -16470,6 +16482,7 @@ build_lswitch_and_lrouter_flows( lsiv[index].parsed_routes = parsed_routes; lsiv[index].route_tables = route_tables; lsiv[index].route_policies = route_policies; + lsiv[index].nexthops_table = nexthops_table; ds_init(&lsiv[index].match); ds_init(&lsiv[index].actions); @@ -16515,6 +16528,7 @@ build_lswitch_and_lrouter_flows( .route_policies = route_policies, .match = DS_EMPTY_INITIALIZER, .actions = DS_EMPTY_INITIALIZER, + .nexthops_table = nexthops_table, }; /* Combined build - all lflow generation from lswitch and lrouter @@ -16677,7 +16691,8 @@ void build_lflows(struct ovsdb_idl_txn *ovnsb_txn, input_data->svc_monitor_mac, input_data->parsed_routes, input_data->route_policies, - input_data->route_tables); + input_data->route_tables, + input_data->nexthops_table); if (parallelization_state == STATE_INIT_HASH_SIZES) { parallelization_state = STATE_USE_PARALLELIZATION; diff --git a/northd/northd.h b/northd/northd.h index 205793e56..c3b9c0464 100644 --- a/northd/northd.h +++ b/northd/northd.h @@ -218,6 +218,7 @@ struct lflow_input { struct hmap *parsed_routes; struct hmap *route_policies; struct simap *route_tables; + struct simap *nexthops_table; }; extern int parallelization_state; diff --git a/tests/ovn.at b/tests/ovn.at index 13b393932..230096d57 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -28590,7 +28590,7 @@ AT_CHECK([ for hv in 1 2; do grep table=$ecmp_stateful hv${hv}flows | \ grep "priority=100" | \ - grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]]))" + grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[96..127\\]]))" grep table=$arp_resolve hv${hv}flows | \ grep "priority=200" | \ @@ -28719,7 +28719,7 @@ AT_CHECK([ for hv in 1 2; do grep table=$ecmp_stateful hv${hv}flows | \ grep "priority=100" | \ - grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]]))" + grep -c "ct(commit,zone=NXM_NX_REG11\\[[0..15\\]],.*exec(move:NXM_OF_ETH_SRC\\[[\\]]->NXM_NX_CT_LABEL\\[[32..79\\]],load:0x[[0-9]]->NXM_NX_CT_MARK\\[[16..31\\]],load:0x[[0-9]]->NXM_NX_CT_LABEL\\[[96..127\\]]))" grep table=$arp_resolve hv${hv}flows | \ grep "priority=200" | \ diff --git a/tests/system-ovn.at b/tests/system-ovn.at index ddb3d14e9..063769ccd 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -6172,19 +6172,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.2 | FORMAT_PING], \ # and just ensure that the known ethernet address is present. AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.0.1) | \ sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | -sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl -icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000 -tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x401020400000000,protoinfo=(state=<cleared>) +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | +sed -e 's/labels=0x[[0-9]]/labels=0x?/'], [0], [dnl +icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000 +tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000,protoinfo=(state=<cleared>) ]) # Ensure datapaths show conntrack states as expected # Like with conntrack entries, we shouldn't try to predict # port binding tunnel keys. So omit them from expected labels. -ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -c], [0], [dnl +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \ +grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000000401020400000000/.*)' -c], [0], [dnl 2 ]) -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x401020400000000)' -c], [0], [dnl +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \ +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000000401020400000000)' -c]], [0], [dnl 2 ]) @@ -6203,18 +6205,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.2 | FORMAT_PING], \ [0], [dnl 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \ +grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000001001020400000000/.*)' -c], [0], [dnl 2 ]) -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \ +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000001001020400000000)' -c]], [0], [dnl 2 ]) -AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(172.16.0.1) | \ +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 1001020400000000 | FORMAT_CT(172.16.0.1) | \ sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | -sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl -icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000 -tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>) +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | +sed -e 's/labels=0x[[0-9]]/labels=0x?/' | sort], [0], [dnl +icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000 +tcp,orig=(src=172.16.0.1,dst=10.0.0.2,sport=<cleared>,dport=<cleared>),reply=(src=10.0.0.2,dst=172.16.0.1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000,protoinfo=(state=<cleared>) ]) # Check entries in table 76 and 77 expires w/o traffic OVS_WAIT_UNTIL([ @@ -6373,11 +6378,12 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 fd01::2 | FORMAT_PING], \ # Ensure datapaths show conntrack states as expected # Like with conntrack entries, we shouldn't try to predict # port binding tunnel keys. So omit them from expected labels. -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x401020400000000/.*)' -c], [0], [dnl +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \ +grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000000401020400000000/.*)' -c], [0], [dnl 2 ]) - -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x401020400000000)' -c], [0], [dnl +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \ +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000000401020400000000)' -c]], [0], [dnl 2 ]) @@ -6386,9 +6392,10 @@ AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_lab # and just ensure that the known ethernet address is present. AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fd01::2) | \ sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | -sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | sort], [0], [dnl -icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000 -tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x401020400000000,protoinfo=(state=<cleared>) +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | +sed -e 's/labels=0x[[0-9]]/labels=0x?/' | sort], [0], [dnl +icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000 +tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000000401020400000000,protoinfo=(state=<cleared>) ]) # Flush conntrack entries for easier output parsing of next test. @@ -6405,18 +6412,21 @@ NS_CHECK_EXEC([bob1], [ping -q -c 3 -i 0.3 -w 2 fd01::2 | FORMAT_PING], \ 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl +AT_CHECK([ovs-appctl dpctl/dump-flows | sed -e 's/label=0x[[0-9]]/label=0x?/' | \ +grep 'ct_state(+new-est-rpl+trk).*ct(.*label=0x?000000001001020400000000/.*)' -c], [0], [dnl 2 ]) -AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl +AT_CHECK([[ovs-appctl dpctl/dump-flows | sed -e 's/ct_label(0x[0-9]/ct_label(0x?/' | \ +grep 'ct_state(-new+est+rpl+trk).*ct_label(0x?000000001001020400000000)' -c]], [0], [dnl 2 ]) -AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | FORMAT_CT(fd01::2) | \ +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 1001020400000000 | FORMAT_CT(fd01::2) | \ sed -e 's/zone=[[0-9]]*/zone=<cleared>/' | -sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl -icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000 -tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000,protoinfo=(state=<cleared>) +sed -e 's/mark=[[0-9]]*/mark=<cleared>/' | +sed -e 's/labels=0x[[0-9]]/labels=0x?/'], [0], [dnl +icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000 +tcp,orig=(src=fd07::1,dst=fd01::2,sport=<cleared>,dport=<cleared>),reply=(src=fd01::2,dst=fd07::1,sport=<cleared>,dport=<cleared>),zone=<cleared>,mark=<cleared>,labels=0x?000000001001020400000000,protoinfo=(state=<cleared>) ]) # Check entries in table 76 and 77 expires w/o traffic -- 2.45.2 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev