On 22/02/2021 19:40, svc.eng.git-m...@nutanix.com wrote: > From: karthik-kc <karthi...@nutanix.com>
Hi Karthik. Thanks for this. I have some comments below. Also, just to let you know, its needs a rebase and the UTs passed for me. > > Physical switches have the ability to specify "discard" or sometimes > "NULL interface" as a nexthop for routes. This can be used to prevent > routing loops in the network. Add a similar configuration for ovn > where nexthop accepts the string "discard". When the nexthop is discard > the action in the routing table will be to drop the packets. > > Signed-off-by: Karthik Chandrashekar <karthi...@nutanix.com> > --- > northd/ovn-northd.c | 126 +++++++++++++++++++++++------------------- > ovn-nb.xml | 4 +- > tests/ovn-nbctl.at | 13 +++++ > tests/ovn.at | 93 +++++++++++++++++++++++++++++++ > utilities/ovn-nbctl.c | 50 ++++++++++++----- > 5 files changed, 215 insertions(+), 71 deletions(-) > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index 39d798782..18d0e0b43 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -7749,6 +7749,7 @@ struct parsed_route { > uint32_t hash; > const struct nbrec_logical_router_static_route *route; > bool ecmp_symmetric_reply; > + bool is_discard_route; > }; > > static uint32_t > @@ -7768,20 +7769,23 @@ parsed_routes_add(struct ovs_list *routes, > /* Verify that the next hop is an IP address with an all-ones mask. */ > struct in6_addr nexthop; > unsigned int plen; > - if (!ip46_parse_cidr(route->nexthop, &nexthop, &plen)) { > - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); > - VLOG_WARN_RL(&rl, "bad 'nexthop' %s in static route" > - UUID_FMT, route->nexthop, > - UUID_ARGS(&route->header_.uuid)); > - return NULL; > - } > - if ((IN6_IS_ADDR_V4MAPPED(&nexthop) && plen != 32) || > - (!IN6_IS_ADDR_V4MAPPED(&nexthop) && plen != 128)) { > - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); > - VLOG_WARN_RL(&rl, "bad next hop mask %s in static route" > - UUID_FMT, route->nexthop, > - UUID_ARGS(&route->header_.uuid)); > - return NULL; > + bool is_discard_route = !strcmp(route->nexthop, "discard"); > + if (!is_discard_route) { > + if (!ip46_parse_cidr(route->nexthop, &nexthop, &plen)) { > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); > + VLOG_WARN_RL(&rl, "bad 'nexthop' %s in static route" > + UUID_FMT, route->nexthop, > + UUID_ARGS(&route->header_.uuid)); > + return NULL; > + } > + if ((IN6_IS_ADDR_V4MAPPED(&nexthop) && plen != 32) || > + (!IN6_IS_ADDR_V4MAPPED(&nexthop) && plen != 128)) { > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); > + VLOG_WARN_RL(&rl, "bad next hop mask %s in static route" > + UUID_FMT, route->nexthop, > + UUID_ARGS(&route->header_.uuid)); > + return NULL; > + } > } > > /* Parse ip_prefix */ > @@ -7795,13 +7799,15 @@ parsed_routes_add(struct ovs_list *routes, > } > > /* Verify that ip_prefix and nexthop have same address familiy. */ > - if (IN6_IS_ADDR_V4MAPPED(&prefix) != IN6_IS_ADDR_V4MAPPED(&nexthop)) { > - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); > - VLOG_WARN_RL(&rl, "Address family doesn't match between 'ip_prefix' > %s" > - " and 'nexthop' %s in static route"UUID_FMT, > - route->ip_prefix, route->nexthop, > - UUID_ARGS(&route->header_.uuid)); > - return NULL; > + if (!is_discard_route) { > + if (IN6_IS_ADDR_V4MAPPED(&prefix) != IN6_IS_ADDR_V4MAPPED(&nexthop)) > { > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); > + VLOG_WARN_RL(&rl, "Address family doesn't match between > 'ip_prefix' %s" > + " and 'nexthop' %s in static route"UUID_FMT, > + route->ip_prefix, route->nexthop, > + UUID_ARGS(&route->header_.uuid)); > + return NULL; > + } > } > > const struct nbrec_bfd *nb_bt = route->bfd; > @@ -7832,6 +7838,7 @@ parsed_routes_add(struct ovs_list *routes, > pr->route = route; > pr->ecmp_symmetric_reply = smap_get_bool(&route->options, > "ecmp_symmetric_reply", false); > + pr->is_discard_route = is_discard_route; > ovs_list_insert(routes, &pr->list_node); > return pr; > } > @@ -8244,10 +8251,11 @@ build_ecmp_route_flow(struct hmap *lflows, struct > ovn_datapath *od, > } > > static void > -add_route(struct hmap *lflows, const struct ovn_port *op, > - const char *lrp_addr_s, const char *network_s, int plen, > - const char *gateway, bool is_src_route, > - const struct ovsdb_idl_row *stage_hint) > +add_route(struct hmap *lflows, struct ovn_datapath *od, > + const struct ovn_port *op, const char *lrp_addr_s, > + const char *network_s, int plen, const char *gateway, > + bool is_src_route, const struct ovsdb_idl_row *stage_hint, > + bool is_discard_route) > { > bool is_ipv4 = strchr(network_s, '.') ? true : false; > struct ds match = DS_EMPTY_INITIALIZER; > @@ -8266,30 +8274,34 @@ add_route(struct hmap *lflows, const struct ovn_port > *op, > &match, &priority); > > struct ds common_actions = DS_EMPTY_INITIALIZER; > - ds_put_format(&common_actions, REG_ECMP_GROUP_ID" = 0; %s = ", > - is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6); > - if (gateway) { > - ds_put_cstr(&common_actions, gateway); > - } else { > - ds_put_format(&common_actions, "ip%s.dst", is_ipv4 ? "4" : "6"); > - } > - ds_put_format(&common_actions, "; " > - "%s = %s; " > - "eth.src = %s; " > - "outport = %s; " > - "flags.loopback = 1; " > - "next;", > - is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6, > - lrp_addr_s, > - op->lrp_networks.ea_s, > - op->json_key); > struct ds actions = DS_EMPTY_INITIALIZER; > - ds_put_format(&actions, "ip.ttl--; %s", ds_cstr(&common_actions)); > + if (is_discard_route) { > + ds_put_format(&actions, "drop;"); > + } else { > + ds_put_format(&common_actions, REG_ECMP_GROUP_ID" = 0; %s = ", > + is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6); > + if (gateway) { > + ds_put_cstr(&common_actions, gateway); > + } else { > + ds_put_format(&common_actions, "ip%s.dst", is_ipv4 ? "4" : "6"); > + } > + ds_put_format(&common_actions, "; " > + "%s = %s; " > + "eth.src = %s; " > + "outport = %s; " > + "flags.loopback = 1; " > + "next;", > + is_ipv4 ? REG_SRC_IPV4 : REG_SRC_IPV6, > + lrp_addr_s, > + op->lrp_networks.ea_s, > + op->json_key); > + ds_put_format(&actions, "ip.ttl--; %s", ds_cstr(&common_actions)); > + } > > - ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority, > + ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, priority, > ds_cstr(&match), ds_cstr(&actions), > stage_hint); > - if (op->has_bfd) { > + if (op && op->has_bfd) { > ds_put_format(&match, " && udp.dst == 3784"); > ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_ROUTING, > priority + 1, ds_cstr(&match), > @@ -8311,16 +8323,18 @@ build_static_route_flow(struct hmap *lflows, struct > ovn_datapath *od, > const struct nbrec_logical_router_static_route *route = route_->route; > > /* Find the outgoing port. */ > - if (!find_static_route_outport(od, ports, route, > - IN6_IS_ADDR_V4MAPPED(&route_->prefix), > - &lrp_addr_s, &out_port)) { > - return; > + if (!route_->is_discard_route) { > + if (!find_static_route_outport(od, ports, route, > + IN6_IS_ADDR_V4MAPPED(&route_->prefix), > + &lrp_addr_s, &out_port)) { > + return; > + } > } > > char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen); > - add_route(lflows, out_port, lrp_addr_s, prefix_s, route_->plen, > - route->nexthop, route_->is_src_route, > - &route->header_); > + add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port, > + lrp_addr_s, prefix_s, route_->plen, route->nexthop, > + route_->is_src_route, &route->header_, > route_->is_discard_route); > > free(prefix_s); > } > @@ -9386,17 +9400,17 @@ build_ip_routing_flows_for_lrouter_port( > if (op->nbrp) { > > for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { > - add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s, > + add_route(lflows, op->od, op, > op->lrp_networks.ipv4_addrs[i].addr_s, > op->lrp_networks.ipv4_addrs[i].network_s, > op->lrp_networks.ipv4_addrs[i].plen, NULL, false, > - &op->nbrp->header_); > + &op->nbrp->header_, false); > } > > for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { > - add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s, > + add_route(lflows, op->od, op, > op->lrp_networks.ipv6_addrs[i].addr_s, > op->lrp_networks.ipv6_addrs[i].network_s, > op->lrp_networks.ipv6_addrs[i].plen, NULL, false, > - &op->nbrp->header_); > + &op->nbrp->header_, false); > } > } > } > diff --git a/ovn-nb.xml b/ovn-nb.xml > index a94918bb6..85efd70f2 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -2636,7 +2636,9 @@ > <column name="nexthop"> > <p> > Nexthop IP address for this route. Nexthop IP address should be the > IP > - address of a connected router port or the IP address of a logical > port. > + address of a connected router port or the IP address of a logical > port > + or can be set to "discard" for dropping packets which match the given Looking at other examples in this document, I think you should highlight "discard" with <code> </code>. Also, you should update the "ovs-nbctl" man page. > + route. > </p> > </column> > > diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at > index 6d91aa4c5..f4b2a88c2 100644 > --- a/tests/ovn-nbctl.at > +++ b/tests/ovn-nbctl.at > @@ -1467,18 +1467,26 @@ AT_CHECK([ovn-nbctl lr-route-add lr0 10.0.0.111/24 > 11.0.0.1/24], [1], [], > AT_CHECK([ovn-nbctl lr-route-add lr0 2001:0db8:1::/64 > 2001:0db8:0:f103::1/64], [1], [], > [ovn-nbctl: bad IPv6 nexthop argument: 2001:0db8:0:f103::1/64 > ]) > +AT_CHECK([ovn-nbctl lr-route-add lr0 20.0.0.0/24 discard lp0], [1], [], > + [ovn-nbctl: outport is not valid for discard routes. > +]) > > AT_CHECK([ovn-nbctl --may-exist lr-route-add lr0 10.0.0.111/24 11.0.0.1]) > AT_CHECK([ovn-nbctl --policy=src-ip lr-route-add lr0 9.16.1.0/24 11.0.0.1]) > dnl Add a route with existed prefix but different policy (src-ip) > AT_CHECK([ovn-nbctl --policy=src-ip lr-route-add lr0 10.0.0.0/24 11.0.0.2]) > > +AT_CHECK([ovn-nbctl lr-route-add lr0 20.0.0.0/24 discard]) > +AT_CHECK([ovn-nbctl --policy=src-ip lr-route-add lr0 20.0.0.0/24 discard]) > + Depending on the outcome of the bfd/ecmp questions that I have in the "ovn-nbctl.c" section below, you may need to add some tests for them. > AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl > IPv4 Routes > 10.0.0.0/24 11.0.0.1 dst-ip > 10.0.1.0/24 11.0.1.1 dst-ip lp0 > + 20.0.0.0/24 discard dst-ip > 9.16.1.0/24 11.0.0.1 src-ip > 10.0.0.0/24 11.0.0.2 src-ip > + 20.0.0.0/24 discard src-ip > 0.0.0.0/0 192.168.0.1 dst-ip > ]) > > @@ -1487,11 +1495,16 @@ AT_CHECK([ovn-nbctl lr-route-list lr0], [0], [dnl > IPv4 Routes > 10.0.0.0/24 11.0.0.1 dst-ip lp1 > 10.0.1.0/24 11.0.1.1 dst-ip lp0 > + 20.0.0.0/24 discard dst-ip > 9.16.1.0/24 11.0.0.1 src-ip > 10.0.0.0/24 11.0.0.2 src-ip > + 20.0.0.0/24 discard src-ip > 0.0.0.0/0 192.168.0.1 dst-ip > ]) > > +AT_CHECK([ovn-nbctl --policy=src-ip lr-route-del lr0 20.0.0.0/24]) > +AT_CHECK([ovn-nbctl lr-route-del lr0 20.0.0.0/24 discard]) > + > dnl Delete non-existent prefix > AT_CHECK([ovn-nbctl lr-route-del lr0 10.0.2.1/24], [1], [], > [ovn-nbctl: no matching route: policy 'any', prefix '10.0.2.0/24', nexthop > 'any', output_port 'any'. > diff --git a/tests/ovn.at b/tests/ovn.at > index 55ea6d170..cdbf035fb 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -24049,3 +24049,96 @@ wait_column "true" nb:Logical_Switch_Port up > name=lsp1 > > OVN_CLEANUP([hv1]) > AT_CLEANUP > + > +AT_SETUP([ovn -- Static route with discard nexthop]) > +ovn_start > + > +# Logical network: > +# Logical Router lr1 has Logical Switch sw1 (10.0.0.0/24) connected to it. > sw1 has one > +# switch port sw1-ls1 (10.0.0.100) > + > +ovn-nbctl lr-add lr1 > + > +ovn-nbctl ls-add sw1 > + > +# Connect sw1 to lr1 > +ovn-nbctl lrp-add lr1 lr1-sw1 00:00:01:01:02:03 10.0.0.1/24 > +ovn-nbctl lsp-add sw1 sw1-lr1 > +ovn-nbctl lsp-set-type sw1-lr1 router > +ovn-nbctl lsp-set-addresses sw1-lr1 router > +ovn-nbctl lsp-set-options sw1-lr1 router-port=lr1-sw1 > + > +# Create logical port sw1-lp1 in sw1 > +ovn-nbctl lsp-add sw1 sw1-lp1 \ > +-- lsp-set-addresses sw1-lp1 "00:00:04:01:02:03 10.0.0.100" > + > +# Create one hypervisor and create OVS ports corresponding to logical ports. > +net_add n1 > + > +sim_add hv1 > +as hv1 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.1 > + > +ovs-vsctl -- add-port br-int hv1-vif1 -- \ > + set interface hv1-vif1 external-ids:iface-id=sw1-lp1 \ > + options:tx_pcap=hv1/vif1-tx.pcap \ > + options:rxq_pcap=hv1/vif1-rx.pcap \ > + ofport-request=1 > + > +# Install static routes to drop traffic > +ovn-nbctl lr-route-add lr1 20.0.0.0/24 discard > + > +# Allow some time for ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +# Check logical flows for drop rule > +AT_CHECK([ovn-sbctl dump-flows | grep lr_in_ip_routing | grep "20.0.0.0/24" > | \ > + grep drop | wc -l], [0], [dnl > +1 > +]) > + > +# Send packet. > +packet="inport==\"sw1-lp1\" && eth.src==00:00:04:01:02:03 && > + eth.dst==00:00:01:01:02:03 && ip4 && ip.ttl==64 && > + ip4.src==10.0.0.100 && ip4.dst==20.0.0.200 && > + udp && udp.src==53 && udp.dst==4369" > + > +as hv1 ovs-appctl -t ovn-controller inject-pkt "$packet" > + > +# Check if packet hit the drop rule > +AT_CHECK([ovs-ofctl dump-flows br-int | grep "nw_dst=20.0.0.0/24" | \ > + grep "actions=drop" | grep "n_packets=1" | wc -l], [0], [dnl > +1 > +]) > + > +# Remove destination match and add discard route with source match > +ovn-nbctl lr-route-del lr1 20.0.0.0/24 > +ovn-nbctl --policy="src-ip" lr-route-add lr1 10.0.0.0/24 discard > + > +# Allow some time for ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +# Check logical flows for drop rule > +AT_CHECK([ovn-sbctl dump-flows | grep lr_in_ip_routing | grep "10.0.0.0/24" > | \ > + grep drop | wc -l], [0], [dnl > +1 > +]) > + > +# Send packet. > +packet="inport==\"sw1-lp1\" && eth.src==00:00:04:01:02:03 && > + eth.dst==00:00:01:01:02:03 && ip4 && ip.ttl==64 && > + ip4.src==10.0.0.100 && ip4.dst==20.0.0.200 && > + udp && udp.src==53 && udp.dst==4369" > + > +as hv1 ovs-appctl -t ovn-controller inject-pkt "$packet" > + > +# Check if packet hit the drop rule > +AT_CHECK([ovs-ofctl dump-flows br-int "nw_src=10.0.0.0/24" | \ > + grep "actions=drop" | grep "n_packets=1" | wc -l], [0], [dnl > +1 > +]) > + > +OVN_CLEANUP([hv1]) > + > +AT_CLEANUP > diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c > index 2c77f4ba7..cab645d83 100644 > --- a/utilities/ovn-nbctl.c > +++ b/utilities/ovn-nbctl.c > @@ -3972,6 +3972,7 @@ nbctl_lr_route_add(struct ctl_context *ctx) > > const char *policy = shash_find_data(&ctx->options, "--policy"); > bool is_src_route = false; > + bool is_discard_route = !strcmp(ctx->argv[3], "discard"); bit of a nit but this could probably be moved to just above where you use it below? > if (policy) { > if (!strcmp(policy, "src-ip")) { > is_src_route = true; > @@ -3992,13 +3993,17 @@ nbctl_lr_route_add(struct ctl_context *ctx) > return; > } > > - next_hop = v6_prefix > - ? normalize_ipv6_addr_str(ctx->argv[3]) > - : normalize_ipv4_addr_str(ctx->argv[3]); > - if (!next_hop) { > - ctl_error(ctx, "bad %s nexthop argument: %s", > - v6_prefix ? "IPv6" : "IPv4", ctx->argv[3]); > - goto cleanup; > + if (is_discard_route) { > + next_hop = xasprintf("discard"); > + } else { > + next_hop = v6_prefix > + ? normalize_ipv6_addr_str(ctx->argv[3]) > + : normalize_ipv4_addr_str(ctx->argv[3]); > + if (!next_hop) { > + ctl_error(ctx, "bad %s nexthop argument: %s", > + v6_prefix ? "IPv6" : "IPv4", ctx->argv[3]); > + goto cleanup; > + } > } > > struct shash_node *bfd = shash_find(&ctx->options, "--bfd"); > @@ -4047,6 +4052,10 @@ nbctl_lr_route_add(struct ctl_context *ctx) > nbrec_logical_router_static_route_set_ip_prefix(route, prefix); > nbrec_logical_router_static_route_set_nexthop(route, next_hop); > if (ctx->argc == 5) { > + if (is_discard_route) { > + ctl_error(ctx, "outport is not valid for discard > routes."); > + goto cleanup; > + } > nbrec_logical_router_static_route_set_output_port( > route, ctx->argv[4]); > } Is "discard" relevant for BFD? I would think not but not sure of the BFD implmentation but it the BFD peer address is getting setting to next_hop which could be "discard". > @@ -4076,6 +4085,10 @@ nbctl_lr_route_add(struct ctl_context *ctx) > nbrec_logical_router_static_route_set_ip_prefix(route, prefix); > nbrec_logical_router_static_route_set_nexthop(route, next_hop); > if (ctx->argc == 5) { > + if (is_discard_route) { > + ctl_error(ctx, "outport is not valid for discard routes."); > + goto cleanup; > + } You check for this at least twice. Maybe you can consolidate these checks in one check at the top of this function somewhere? > nbrec_logical_router_static_route_set_output_port(route, > ctx->argv[4]); You dont seem to disable this if ecmp is enabled? How does this work with ecmp? > } > if (policy) { > @@ -4146,10 +4159,14 @@ nbctl_lr_route_del(struct ctl_context *ctx) > > char *nexthop = NULL; > if (ctx->argc >= 4) { > - nexthop = normalize_prefix_str(ctx->argv[3]); > - if (!nexthop) { > - ctl_error(ctx, "bad nexthop argument: %s", ctx->argv[3]); > - return; > + if (!strcmp(ctx->argv[3], "discard")) { > + nexthop = xasprintf("discard"); > + } else { > + nexthop = normalize_prefix_str(ctx->argv[3]); > + if (!nexthop) { > + ctl_error(ctx, "bad nexthop argument: %s", ctx->argv[3]); > + return; > + } > } > } > > @@ -4189,9 +4206,12 @@ nbctl_lr_route_del(struct ctl_context *ctx) > } > > /* Compare nexthop, if specified. */ > + bool is_discard_route = !strcmp(lr->static_routes[i]->nexthop, > + "discard"); bit of a nit but i guess we dont need to do this strcmp() unless 'nexthop' is specified. > if (nexthop) { > - char *rt_nexthop = > - normalize_prefix_str(lr->static_routes[i]->nexthop); > + char *rt_nexthop = is_discard_route > + ? xasprintf("discard") > + : normalize_prefix_str(lr->static_routes[i]->nexthop); > if (!rt_nexthop) { > /* Ignore existing nexthop we couldn't parse. */ > continue; > @@ -5579,7 +5599,9 @@ print_route(const struct > nbrec_logical_router_static_route *route, > { > > char *prefix = normalize_prefix_str(route->ip_prefix); > - char *next_hop = normalize_prefix_str(route->nexthop); > + char *next_hop = !strcmp(route->nexthop, "discard") > + ? xasprintf("discard") > + : normalize_prefix_str(route->nexthop); > ds_put_format(s, "%25s %25s", prefix, next_hop); > free(prefix); > free(next_hop); > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev