From: Numan Siddique <nusid...@redhat.com> OVN has the actions - put_arp and put_nd to learn the mac bindings from the ARP/ND packets. These actions update the Southbound MAC_Binding table. These actions translates to controller actions. Whenever pinctrl thread receives such packets, it wakes up the main ovn-controller thread. If the MAC_Binding table is already upto date, this results in unnecessary CPU cyles. There are some security implications as well. A rogue VM can flood broadcast ARP request/reply packets and this could cause DoS issues. A physical switch may send periodic GARPs and these packets hit ovn-controllers.
This patch solves these problems by learning the mac bindings only if required. There is no need to apply the put_arp/put_nd action if the Southbound MAC_Binding row is upto date. New actions - lookup_arp and lookup_nd are added which looks up the IP, MAC pair in the mac_binding table and stores the result in a register. 1 if lookup is successful, 0 otherwise. ovn-northd adds 2 new stages - LOOKUP_NEIGHBOR and LEARN_NEIGHBOR before IP_INPUT in the router ingress pipeline.c. The LOOKUP_NEIGHBOR stage adds flows to do the lookup in the mac_binding table and the LEARN_NEIGHBOR adds flows to learn the neighbors only if require. The lflow module of ovn-controller adds OF flows in table 67 (OFTABLE_MAC_LOOKUP) for each mac_binding entry with the match reg0 = ip && eth.src = mac with the action - load:1->reg10[6] Eg: table=31, priority=100,arp,reg0=0xaca8006f,reg14=0x3,metadata=0x3,dl_src=00:44:00:00:00:04 actions=load:1->NXM_NX_REG10[6] This patch should also address the issue reported in 'Reported-at' Reported-at: https://bugzilla.redhat.com/1729846 Reported-by: Haidong Li <ha...@redhat.com> CC: Han ZHou <hzh...@ebay.com> CC: Dumitru Ceara <dce...@redhat.com> Tested-by: Dumitru Ceara <dce...@redhat.com> Signed-off-by: Numan Siddique <nusid...@redhat.com> --- v2 -> v3 ======== * Addressed review comments from Han. v1 -> v2 ======== * Addressed review comments from Han - Storing the result of lookup_arp/lookup_nd in a register. controller/lflow.c | 37 ++++- controller/lflow.h | 1 + include/ovn/actions.h | 13 ++ include/ovn/logical-fields.h | 4 + lib/actions.c | 114 ++++++++++++++ northd/ovn-northd.8.xml | 212 ++++++++++++++++--------- northd/ovn-northd.c | 205 ++++++++++++++----------- ovn-architecture.7.xml | 18 +++ ovn-sb.xml | 57 +++++++ tests/ovn.at | 290 ++++++++++++++++++++++++++++++++++- tests/test-ovn.c | 1 + utilities/ovn-trace.c | 69 +++++++++ 12 files changed, 844 insertions(+), 177 deletions(-) diff --git a/controller/lflow.c b/controller/lflow.c index d0335a83a..e3ed20cd4 100644 --- a/controller/lflow.c +++ b/controller/lflow.c @@ -687,6 +687,7 @@ consider_logical_flow( .egress_ptable = OFTABLE_LOG_EGRESS_PIPELINE, .output_ptable = output_ptable, .mac_bind_ptable = OFTABLE_MAC_BINDING, + .mac_lookup_ptable = OFTABLE_MAC_LOOKUP, }; ovnacts_encode(ovnacts.data, ovnacts.size, &ep, &ofpacts); ovnacts_free(ovnacts.data, ovnacts.size); @@ -777,7 +778,9 @@ consider_neighbor_flow(struct ovsdb_idl_index *sbrec_port_binding_by_name, return; } - struct match match = MATCH_CATCHALL_INITIALIZER; + struct match get_arp_match = MATCH_CATCHALL_INITIALIZER; + struct match lookup_arp_match = MATCH_CATCHALL_INITIALIZER; + if (strchr(b->ip, '.')) { ovs_be32 ip; if (!ip_parse(b->ip, &ip)) { @@ -785,7 +788,9 @@ consider_neighbor_flow(struct ovsdb_idl_index *sbrec_port_binding_by_name, VLOG_WARN_RL(&rl, "bad 'ip' %s", b->ip); return; } - match_set_reg(&match, 0, ntohl(ip)); + match_set_reg(&get_arp_match, 0, ntohl(ip)); + match_set_reg(&lookup_arp_match, 0, ntohl(ip)); + match_set_dl_type(&lookup_arp_match, htons(ETH_TYPE_ARP)); } else { struct in6_addr ip6; if (!ipv6_parse(b->ip, &ip6)) { @@ -795,17 +800,35 @@ consider_neighbor_flow(struct ovsdb_idl_index *sbrec_port_binding_by_name, } ovs_be128 value; memcpy(&value, &ip6, sizeof(value)); - match_set_xxreg(&match, 0, ntoh128(value)); + match_set_xxreg(&get_arp_match, 0, ntoh128(value)); + + match_set_xxreg(&lookup_arp_match, 0, ntoh128(value)); + match_set_dl_type(&lookup_arp_match, htons(ETH_TYPE_IPV6)); + match_set_nw_proto(&lookup_arp_match, 58); + match_set_icmp_code(&lookup_arp_match, 0); } - match_set_metadata(&match, htonll(pb->datapath->tunnel_key)); - match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, pb->tunnel_key); + match_set_metadata(&get_arp_match, htonll(pb->datapath->tunnel_key)); + match_set_reg(&get_arp_match, MFF_LOG_OUTPORT - MFF_REG0, pb->tunnel_key); + + match_set_metadata(&lookup_arp_match, htonll(pb->datapath->tunnel_key)); + match_set_reg(&lookup_arp_match, MFF_LOG_INPORT - MFF_REG0, + pb->tunnel_key); uint64_t stub[1024 / 8]; struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(stub); put_load(mac.ea, sizeof mac.ea, MFF_ETH_DST, 0, 48, &ofpacts); - ofctrl_add_flow(flow_table, OFTABLE_MAC_BINDING, 100, 0, &match, &ofpacts, - &b->header_.uuid); + ofctrl_add_flow(flow_table, OFTABLE_MAC_BINDING, 100, 0, &get_arp_match, + &ofpacts, &b->header_.uuid); + + ofpbuf_clear(&ofpacts); + uint8_t value = 1; + put_load(&value, sizeof value, MFF_LOG_FLAGS, MLF_LOOKUP_MAC_BIT, 1, + &ofpacts); + match_set_dl_src(&lookup_arp_match, mac); + ofctrl_add_flow(flow_table, OFTABLE_MAC_LOOKUP, 100, 0, &lookup_arp_match, + &ofpacts, &b->header_.uuid); + ofpbuf_uninit(&ofpacts); } diff --git a/controller/lflow.h b/controller/lflow.h index 54da00b49..0572668a9 100644 --- a/controller/lflow.h +++ b/controller/lflow.h @@ -65,6 +65,7 @@ struct uuid; #define OFTABLE_SAVE_INPORT 64 #define OFTABLE_LOG_TO_PHY 65 #define OFTABLE_MAC_BINDING 66 +#define OFTABLE_MAC_LOOKUP 67 /* The number of tables for the ingress and egress pipelines. */ #define LOG_PIPELINE_LEN 24 diff --git a/include/ovn/actions.h b/include/ovn/actions.h index 145f27f25..4e2f4d28d 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -73,8 +73,10 @@ struct ovn_extend_table; OVNACT(ND_NA_ROUTER, ovnact_nest) \ OVNACT(GET_ARP, ovnact_get_mac_bind) \ OVNACT(PUT_ARP, ovnact_put_mac_bind) \ + OVNACT(LOOKUP_ARP, ovnact_lookup_mac_bind) \ OVNACT(GET_ND, ovnact_get_mac_bind) \ OVNACT(PUT_ND, ovnact_put_mac_bind) \ + OVNACT(LOOKUP_ND, ovnact_lookup_mac_bind) \ OVNACT(PUT_DHCPV4_OPTS, ovnact_put_opts) \ OVNACT(PUT_DHCPV6_OPTS, ovnact_put_opts) \ OVNACT(SET_QUEUE, ovnact_set_queue) \ @@ -266,6 +268,15 @@ struct ovnact_put_mac_bind { struct expr_field mac; /* 48-bit Ethernet address. */ }; +/* OVNACT_LOOKUP_ARP, OVNACT_LOOKUP_ND. */ +struct ovnact_lookup_mac_bind { + struct ovnact ovnact; + struct expr_field dst; /* 1-bit destination field. */ + struct expr_field port; /* Logical port name. */ + struct expr_field ip; /* 32-bit or 128-bit IP address. */ + struct expr_field mac; /* 48-bit Ethernet address. */ +}; + struct ovnact_gen_option { const struct gen_opts_map *option; struct expr_constant_set value; @@ -628,6 +639,8 @@ struct ovnact_encode_params { uint8_t output_ptable; /* OpenFlow table for 'output' to resubmit. */ uint8_t mac_bind_ptable; /* OpenFlow table for 'get_arp'/'get_nd' to resubmit. */ + uint8_t mac_lookup_ptable; /* OpenFlow table for + 'lookup_arp'/'lookup_nd' to resubmit. */ }; void ovnacts_encode(const struct ovnact[], size_t ovnacts_len, diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h index 9bac8e027..9b7c34fb7 100644 --- a/include/ovn/logical-fields.h +++ b/include/ovn/logical-fields.h @@ -56,6 +56,7 @@ enum mff_log_flags_bits { MLF_FORCE_SNAT_FOR_LB_BIT = 3, MLF_LOCAL_ONLY_BIT = 4, MLF_NESTED_CONTAINER_BIT = 5, + MLF_LOOKUP_MAC_BIT = 6, }; /* MFF_LOG_FLAGS_REG flag assignments */ @@ -84,6 +85,9 @@ enum mff_log_flags { /* Indicate that a packet was received from a nested container. */ MLF_NESTED_CONTAINER = (1 << MLF_NESTED_CONTAINER_BIT), + + /* Indicate that the lookup in the mac binding table was successful. */ + MLF_LOOKUP_MAC = (1 << MLF_LOOKUP_MAC_BIT), }; /* OVN logical fields diff --git a/lib/actions.c b/lib/actions.c index 6a5907e1b..c8c9cc5fd 100644 --- a/lib/actions.c +++ b/lib/actions.c @@ -1607,6 +1607,112 @@ ovnact_put_mac_bind_free(struct ovnact_put_mac_bind *put_mac OVS_UNUSED) { } +static void format_lookup_mac(const struct ovnact_lookup_mac_bind *lookup_mac, + struct ds *s, const char *name) +{ + expr_field_format(&lookup_mac->dst, s); + ds_put_format(s, " = %s(", name); + expr_field_format(&lookup_mac->port, s); + ds_put_cstr(s, ", "); + expr_field_format(&lookup_mac->ip, s); + ds_put_cstr(s, ", "); + expr_field_format(&lookup_mac->mac, s); + ds_put_cstr(s, ");"); +} + +static void +format_LOOKUP_ARP(const struct ovnact_lookup_mac_bind *lookup_mac, + struct ds *s) +{ + format_lookup_mac(lookup_mac, s, "lookup_arp"); +} + +static void +format_LOOKUP_ND(const struct ovnact_lookup_mac_bind *lookup_mac, + struct ds *s) +{ + format_lookup_mac(lookup_mac, s, "lookup_nd"); +} + +static void +encode_lookup_mac(const struct ovnact_lookup_mac_bind *lookup_mac, + enum mf_field_id ip_field, + const struct ovnact_encode_params *ep, + struct ofpbuf *ofpacts) +{ + const struct arg args[] = { + { expr_resolve_field(&lookup_mac->port), MFF_LOG_INPORT }, + { expr_resolve_field(&lookup_mac->ip), ip_field }, + { expr_resolve_field(&lookup_mac->mac), MFF_ETH_SRC}, + }; + + encode_setup_args(args, ARRAY_SIZE(args), ofpacts); + + struct mf_subfield dst = expr_resolve_field(&lookup_mac->dst); + ovs_assert(dst.field); + + put_load(0, MFF_LOG_FLAGS, MLF_LOOKUP_MAC_BIT, 1, ofpacts); + emit_resubmit(ofpacts, ep->mac_lookup_ptable); + + struct ofpact_reg_move *orm = ofpact_put_REG_MOVE(ofpacts); + orm->dst = dst; + orm->src.field = mf_from_id(MFF_LOG_FLAGS); + orm->src.ofs = MLF_LOOKUP_MAC_BIT; + orm->src.n_bits = 1; + + encode_restore_args(args, ARRAY_SIZE(args), ofpacts); +} + +static void +encode_LOOKUP_ARP(const struct ovnact_lookup_mac_bind *lookup_mac, + const struct ovnact_encode_params *ep, + struct ofpbuf *ofpacts) +{ + encode_lookup_mac(lookup_mac, MFF_REG0, ep, ofpacts); +} + +static void +encode_LOOKUP_ND(const struct ovnact_lookup_mac_bind *lookup_mac, + const struct ovnact_encode_params *ep, + struct ofpbuf *ofpacts) +{ + encode_lookup_mac(lookup_mac, MFF_XXREG0, ep, ofpacts); +} + +static void +parse_lookup_mac_bind(struct action_context *ctx, + const struct expr_field *dst, + int width, + struct ovnact_lookup_mac_bind *lookup_mac) +{ + /* Validate that the destination is a 1-bit, modifiable field. */ + char *error = expr_type_check(dst, 1, true); + if (error) { + lexer_error(ctx->lexer, "%s", error); + free(error); + return; + } + + lexer_get(ctx->lexer); /* Skip lookup_arp/lookup_nd. */ + lexer_get(ctx->lexer); /* Skip '('. * */ + + action_parse_field(ctx, 0, false, &lookup_mac->port); + lexer_force_match(ctx->lexer, LEX_T_COMMA); + action_parse_field(ctx, width, false, &lookup_mac->ip); + lexer_force_match(ctx->lexer, LEX_T_COMMA); + action_parse_field(ctx, 48, false, &lookup_mac->mac); + lexer_force_match(ctx->lexer, LEX_T_RPAREN); + lookup_mac->dst = *dst; +} + +static void +ovnact_lookup_mac_bind_free( + struct ovnact_lookup_mac_bind *lookup_mac OVS_UNUSED) +{ + +} + + static void parse_gen_opt(struct action_context *ctx, struct ovnact_gen_option *o, const struct hmap *gen_opts, const char *opts_type) @@ -2722,6 +2828,14 @@ parse_set_action(struct action_context *ctx) && lexer_lookahead(ctx->lexer) == LEX_T_LPAREN) { parse_check_pkt_larger(ctx, &lhs, ovnact_put_CHECK_PKT_LARGER(ctx->ovnacts)); + } else if (!strcmp(ctx->lexer->token.s, "lookup_arp") + && lexer_lookahead(ctx->lexer) == LEX_T_LPAREN) { + parse_lookup_mac_bind(ctx, &lhs, 32, + ovnact_put_LOOKUP_ARP(ctx->ovnacts)); + } else if (!strcmp(ctx->lexer->token.s, "lookup_nd") + && lexer_lookahead(ctx->lexer) == LEX_T_LPAREN) { + parse_lookup_mac_bind(ctx, &lhs, 128, + ovnact_put_LOOKUP_ND(ctx->ovnacts)); } else { parse_assignment_action(ctx, false, &lhs); } diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index 0f4f1c112..8f65798f4 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -1218,7 +1218,126 @@ output; Other packets are implicitly dropped. </p> - <h3>Ingress Table 1: IP Input</h3> + <h3>Ingress Table 1: Neighbor lookup</h3> + + <p> + For ARP and IPv6 Neighbor Discovery packets, this table looks into the + <ref db="OVN_Southbound" table="MAC_Binding"/> records to determine + if OVN needs to learn the mac bindings. Following flows are added: + </p> + + <ul> + <li> + <p> + For each router port <var>P</var> that owns IP address <var>A</var>, + which belongs to subnet <var>S</var> with prefix length <var>L</var>, + a priority-100 flow is added which matches + <code>inport == <var>P</var> && + arp.spa == <var>S</var>/<var>L</var> && arp.op == 1</code> + (ARP request) with the + following actions: + </p> + + <pre> +reg9[4] = lookup_arp(inport, arp.spa, arp.sha); +next; + </pre> + + <p> + If the logical router port <var>P</var> is a distributed gateway + router port, additional match + <code>is_chassis_resident(cr-<var>P</var>)</code> is added so that + the resident gateway chassis handles the neighbor lookup. + </p> + </li> + + <li> + <p> + A priority-100 flow which matches on ARP reply packets and applies + the actions: + </p> + + <pre> +reg9[4] = lookup_arp(inport, arp.spa, arp.sha); +next; + </pre> + </li> + + <li> + <p> + A priority-100 flow which matches on IPv6 Neighbor Discovery + advertisement packet and applies the actions: + </p> + + <pre> +reg9[4] = lookup_nd(inport, nd.target, nd.tll); +next; + </pre> + </li> + + <li> + <p> + A priority-100 flow which matches on IPv6 Neighbor Discovery + solicitation packet and applies the actions: + </p> + + <pre> +reg9[4] = lookup_nd(inport, ip6.src, nd.sll); +next; + </pre> + </li> + + <li> + A priority-0 fallback flow that matches all packets and applies + the action <code>reg9[5] = 1; next;</code> + advancing the packet to the next table. + </li> + </ul> + + <h3>Ingress Table 2: Neighbor learning</h3> + + <p> + This table adds flows to learn the mac bindings from the ARP and + IPv6 Neighbor Solicitation/Advertisement packets if ARP/ND lookup + failed in the previous table. + </p> + + <p> + reg9[4] will be <code>1</code> if the <code>lookup_arp/lookup_nd</code> + in the previous table was successful. + </p> + + <p> + reg9[5] will be <code>1</code> if there was no need to do the lookup. + </p> + + <ul> + <li> + A priority-100 flow with the match + <code>reg9[4] == 1 || reg9[5] == 1</code> and advances the packet + to the next table as there is no need to learn the neighbor. + </li> + + <li> + A priority-90 flow with the match <code>arp</code> and + applies the action + <code>put_arp(inport, arp.spa, arp.sha); next;</code> + </li> + + <li> + A priority-90 flow with the match <code>nd_na</code> and + applies the action + <code>put_nd(inport, nd.target, nd.tll); next;</code> + </li> + + <li> + A priority-90 flow with the match <code>nd_ns</code> and + applies the action + <code>put_nd(inport, ip6.src, nd.sll); next;</code> + </li> + </ul> + + <h3>Ingress Table 3: IP Input</h3> <p> This table is the core of the logical router datapath functionality. It @@ -1315,8 +1434,7 @@ next; </p> <p> - These flows reply to ARP requests for the router's own IP address - and populates mac binding table of the logical router port. + These flows reply to ARP requests for the router's own IP address. The ARP requests are handled only if the requestor's IP belongs to the same subnets of the logical router port. For each router port <var>P</var> that owns IP address <var>A</var>, @@ -1329,7 +1447,6 @@ next; </p> <pre> -put_arp(inport, arp.spa, arp.sha); eth.dst = eth.src; eth.src = <var>E</var>; arp.op = 2; /* ARP reply. */ @@ -1365,17 +1482,6 @@ output; </p> </li> - <li> - <p> - These flows handles ARP requests not for router's own IP address. - They use the SPA and SHA to populate the logical router port's - mac binding table, with priority 80. The typical use case of - these flows are GARP requests handling. For the gateway port - on a distributed logical router, these flows are only programmed - on the gateway port instance on the <code>redirect-chassis</code>. - </p> - </li> - <li> <p> These flows reply to ARP requests for the virtual IP addresses @@ -1446,36 +1552,6 @@ arp.sha = <var>external_mac</var>; </ul> </li> - <li> - <p> - ARP reply handling. Following flows are added to handle ARP replies. - </p> - - <p> - For each distributed gateway logical router port a priority-92 flow - with match <code>inport == <var>P</var> && - is_chassis_resident(cr-<var>P</var>) && eth.bcast && - arp.op == 2 && arp.spa == <var>I</var></code> with the - action <code>put_arp(inport, arp.spa, arp.sha);</code> so that the - resident gateway chassis can learn the GARP reply, where - <var>P</var> is the distributed gateway router port name, - <var>I</var> is the logical router port's network address. - </p> - - <p> - For each distributed gateway logical router port a priority-92 flow - with match <code>inport == <var>P</var> && - !is_chassis_resident(cr-<var>P</var>) && eth.bcast && - arp.op == 2 && arp.spa == <var>I</var></code> with the action - <code>drop;</code> so that other chassis drop this packet. - </p> - - <p> - A priority-90 flow with match <code>arp.op == 2</code> has actions - <code>put_arp(inport, arp.spa, arp.sha);</code>. - </p> - </li> - <li> <p> Reply to IPv6 Neighbor Solicitations. These flows reply to @@ -1494,7 +1570,6 @@ arp.sha = <var>external_mac</var>; </p> <pre> -put_nd(inport, ip6.src, nd.sll); nd_na_router { eth.src = <var>E</var>; ip6.src = <var>A</var>; @@ -1516,7 +1591,6 @@ nd_na_router { </p> <pre> -put_nd(inport, ip6.src, nd.sll); nd_na { eth.src = <var>E</var>; ip6.src = <var>A</var>; @@ -1541,20 +1615,8 @@ nd_na { </li> <li> - IPv6 neighbor advertisement handling. This flow uses neighbor - advertisements to populate the logical router's mac binding - table. A priority-90 flow with match <code>nd_na</code> - has actions <code>put_nd(inport, nd.target, nd.tll);</code>. - </li> - - <li> - IPv6 neighbor solicitation for non-hosted addresses handling. - This flow uses neighbor solicitations to populate the logical - router's mac binding table (ones that were directed at the - logical router would have matched the priority-90 neighbor - solicitation flow already). A priority-80 flow with match - <code>nd_ns</code> has actions - <code>put_nd(inport, ip6.src, nd.sll);</code>. + Priority-85 flows which drops the ARP and IPv6 Neighbor Discovery + packets. </li> <li> @@ -1670,7 +1732,7 @@ icmp6 { </li> </ul> - <h3>Ingress Table 2: DEFRAG</h3> + <h3>Ingress Table 4: DEFRAG</h3> <p> This is to send packets to connection tracker for tracking and @@ -1728,7 +1790,7 @@ icmp6 { </li> </ul> - <p>Ingress Table 3: UNSNAT on Distributed Routers</p> + <p>Ingress Table 5: UNSNAT on Distributed Routers</p> <ul> <li> @@ -1767,7 +1829,7 @@ icmp6 { </li> </ul> - <h3>Ingress Table 4: DNAT</h3> + <h3>Ingress Table 6: DNAT</h3> <p> Packets enter the pipeline with destination IP address that needs to @@ -1775,7 +1837,7 @@ icmp6 { in the reverse direction needs to be unDNATed. </p> - <p>Ingress Table 4: Load balancing DNAT rules</p> + <p>Ingress Table 6: Load balancing DNAT rules</p> <p> Following load balancing DNAT flows are added for Gateway router or @@ -1846,7 +1908,7 @@ icmp6 { </li> </ul> - <p>Ingress Table 4: DNAT on Gateway Routers</p> + <p>Ingress Table 6: DNAT on Gateway Routers</p> <ul> <li> @@ -1872,7 +1934,7 @@ icmp6 { </li> </ul> - <p>Ingress Table 4: DNAT on Distributed Routers</p> + <p>Ingress Table 6: DNAT on Distributed Routers</p> <p> On distributed routers, the DNAT table only handles packets @@ -1919,7 +1981,7 @@ icmp6 { </li> </ul> - <h3>Ingress Table 5: IPv6 ND RA option processing</h3> + <h3>Ingress Table 7: IPv6 ND RA option processing</h3> <ul> <li> @@ -1949,7 +2011,7 @@ reg0[5] = put_nd_ra_opts(<var>options</var>);next; </li> </ul> - <h3>Ingress Table 6: IPv6 ND RA responder</h3> + <h3>Ingress Table 8: IPv6 ND RA responder</h3> <p> This table implements IPv6 ND RA responder for the IPv6 ND RA replies @@ -1994,7 +2056,7 @@ output; </li> </ul> - <h3>Ingress Table 7: IP Routing</h3> + <h3>Ingress Table 9: IP Routing</h3> <p> A packet that arrives at this table is an IP packet that should be @@ -2144,7 +2206,7 @@ next; </li> </ul> - <h3>Ingress Table 8: ARP/ND Resolution</h3> + <h3>Ingress Table 10: ARP/ND Resolution</h3> <p> Any packet that reaches this table is an IP packet whose next-hop @@ -2291,7 +2353,7 @@ next; </ul> - <h3>Ingress Table 9: Check packet length</h3> + <h3>Ingress Table 11: Check packet length</h3> <p> For distributed logical routers with distributed gateway port configured @@ -2321,7 +2383,7 @@ REGBIT_PKT_LARGER = check_pkt_larger(<var>L</var>); next; and advances to the next table. </p> - <h3>Ingress Table 10: Handle larger packets</h3> + <h3>Ingress Table 12: Handle larger packets</h3> <p> For distributed logical routers with distributed gateway port configured @@ -2370,7 +2432,7 @@ icmp4 { and advances to the next table. </p> - <h3>Ingress Table 11: Gateway Redirect</h3> + <h3>Ingress Table 13: Gateway Redirect</h3> <p> For distributed logical routers where one of the logical router @@ -2432,7 +2494,7 @@ icmp4 { </li> </ul> - <h3>Ingress Table 12: ARP Request</h3> + <h3>Ingress Table 14: ARP Request</h3> <p> In the common case where the Ethernet destination has been resolved, this diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index f393cebb8..2df95c150 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -144,20 +144,22 @@ enum ovn_stage { PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 9, "ls_out_port_sec_l2") \ \ /* Logical router ingress stages. */ \ - PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \ - PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \ - PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \ - PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \ - PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \ - PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 5, "lr_in_nd_ra_options") \ - PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \ - PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \ - PIPELINE_STAGE(ROUTER, IN, POLICY, 8, "lr_in_policy") \ - PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 9, "lr_in_arp_resolve") \ - PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 10, "lr_in_chk_pkt_len") \ - PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 11,"lr_in_larger_pkts") \ - PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 12, "lr_in_gw_redirect") \ - PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 13, "lr_in_arp_request") \ + PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \ + PIPELINE_STAGE(ROUTER, IN, LOOKUP_NEIGHBOR, 1, "lr_in_lookup_neighbor") \ + PIPELINE_STAGE(ROUTER, IN, LEARN_NEIGHBOR, 2, "lr_in_learn_neighbor") \ + PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 3, "lr_in_ip_input") \ + PIPELINE_STAGE(ROUTER, IN, DEFRAG, 4, "lr_in_defrag") \ + PIPELINE_STAGE(ROUTER, IN, UNSNAT, 5, "lr_in_unsnat") \ + PIPELINE_STAGE(ROUTER, IN, DNAT, 6, "lr_in_dnat") \ + PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 7, "lr_in_nd_ra_options") \ + PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 8, "lr_in_nd_ra_response") \ + PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 9, "lr_in_ip_routing") \ + PIPELINE_STAGE(ROUTER, IN, POLICY, 10, "lr_in_policy") \ + PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 11, "lr_in_arp_resolve") \ + PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN , 12, "lr_in_chk_pkt_len") \ + PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 13,"lr_in_larger_pkts") \ + PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 14, "lr_in_gw_redirect") \ + PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 15, "lr_in_arp_request") \ \ /* Logical router egress stages. */ \ PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \ @@ -196,6 +198,8 @@ enum ovn_stage { #define REGBIT_DISTRIBUTED_NAT "reg9[2]" /* Register to store the result of check_pkt_larger action. */ #define REGBIT_PKT_LARGER "reg9[3]" +#define REGBIT_LOOKUP_NEIGHBOR_RESULT "reg9[4]" +#define REGBIT_SKIP_LOOKUP_NEIGHBOR "reg9[5]" /* Returns an "enum ovn_stage" built from the arguments. */ static enum ovn_stage @@ -6375,7 +6379,96 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, ds_cstr(&match), "next;"); } - /* Logical router ingress table 1: IP Input. */ + /* Logical router ingress table 1: LOOKUP_NEIGHBOR and + * table 2: LEARN_NEIGHBOR. */ + HMAP_FOR_EACH (od, key_node, datapaths) { + if (!od->nbr) { + continue; + } + + /* Learn MAC bindings from ARP/IPv6 ND. + * + * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the + * (arp.spa, arp.sha) in the mac binding table using the 'lookup_arp' + * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_RESULT bit. + * + * For IPv6 ND NA packets, table LOOKUP_NEIGHBOR does a lookup + * for the (nd.target, nd.tll) in the mac binding table using the + * 'lookup_nd' action and stores the result in + * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. + * + * For IPv6 ND NS packets, table LOOKUP_NEIGHBOR does a lookup + * for the (ip6.src, nd.sll) in the mac binding table using the + * 'lookup_nd' action and stores the result in + * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. + * + * Table LEARN_NEIGHBOR learns the mac-binding using the action + * - 'put_arp/put_nd' only if REGBIT_LOOKUP_NEIGHBOR_RESULT bit + * is not set. + * + * */ + + /* Flows for LOOKUP_NEIGHBOR. */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, + "arp.op == 2", + REGBIT_LOOKUP_NEIGHBOR_RESULT" = " + "lookup_arp(inport, arp.spa, arp.sha); next;"); + + ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na", + REGBIT_LOOKUP_NEIGHBOR_RESULT" = " + "lookup_nd(inport, nd.target, nd.tll); next;"); + + ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns", + REGBIT_LOOKUP_NEIGHBOR_RESULT" = " + "lookup_nd(inport, ip6.src, nd.sll); next;"); + + /* For other packet types, we can skip neighbor learning. + * So set REGBIT_SKIP_LOOKUP_NEIGHBOR to 1. */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1", + REGBIT_SKIP_LOOKUP_NEIGHBOR" = 1; next;"); + + /* Flows for LEARN_NEIGHBOR. */ + /* Skip Neighbor learning if not required. */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100, + REGBIT_SKIP_LOOKUP_NEIGHBOR" == 1 || " + REGBIT_LOOKUP_NEIGHBOR_RESULT" == 1", "next;"); + + ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, + "arp", "put_arp(inport, arp.spa, arp.sha); next;"); + + ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, + "nd_na", "put_nd(inport, nd.target, nd.tll); next;"); + + ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90, + "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;"); + } + + HMAP_FOR_EACH (op, key_node, ports) { + if (!op->nbrp) { + continue; + } + + /* Check if we need to learn mac-binding from ARP requests. */ + for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { + ds_clear(&match); + ds_put_format(&match, + "inport == %s && arp.spa == %s/%u && arp.op == 1", + op->json_key, + op->lrp_networks.ipv4_addrs[i].network_s, + op->lrp_networks.ipv4_addrs[i].plen); + if (op->od->l3dgw_port && op == op->od->l3dgw_port + && op->od->l3redirect_port) { + ds_put_format(&match, " && is_chassis_resident(%s)", + op->od->l3redirect_port->json_key); + } + ovn_lflow_add(lflows, op->od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, + ds_cstr(&match), + REGBIT_LOOKUP_NEIGHBOR_RESULT" = " + "lookup_arp(inport, arp.spa, arp.sha); next;"); + } + } + + /* Logical router ingress table 3: IP Input. */ HMAP_FOR_EACH (od, key_node, datapaths) { if (!od->nbr) { continue; @@ -6397,10 +6490,13 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 95, "ip4.mcast", od->mcast_info.rtr.relay ? "next;" : "drop;"); - /* ARP reply handling. Use ARP replies to populate the logical - * router's ARP table. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2", - "put_arp(inport, arp.spa, arp.sha);"); + /* Drop ARP packets (priority 85). ARP request packets for router's own + * IPs are handled with priority-90 flows. + * Drop IPv6 ND packets (priority 85). ND NA packets for router's own + * IPs are handled with priority-90 flows. + */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85, + "arp || nd", "drop;"); /* Drop Ethernet local broadcast. By definition this traffic should * not be forwarded.*/ @@ -6413,23 +6509,12 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, ds_cstr(&match), "drop;"); - /* ND advertisement handling. Use advertisements to populate - * the logical router's ARP/ND table. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na", - "put_nd(inport, nd.target, nd.tll);"); - - /* Lean from neighbor solicitations that were not directed at - * us. (A priority-90 flow will respond to requests to us and - * learn the sender's mac address. */ - ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns", - "put_nd(inport, ip6.src, nd.sll);"); - /* Pass other traffic not already handled to the next table for * routing. */ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;"); } - /* Logical router ingress table 1: IP Input for IPv4. */ + /* Logical router ingress table 3: IP Input for IPv4. */ HMAP_FOR_EACH (op, key_node, ports) { if (!op->nbrp) { continue; @@ -6539,7 +6624,6 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, ds_clear(&actions); ds_put_format(&actions, - "put_arp(inport, arp.spa, arp.sha); " "eth.dst = eth.src; " "eth.src = %s; " "arp.op = 2; /* ARP reply */ " @@ -6558,62 +6642,6 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, ds_cstr(&match), ds_cstr(&actions)); } - /* Learn from ARP requests that were not directed at us. A typical - * use case is GARP request handling. (A priority-90 flow will - * respond to request to us and learn the sender's mac address.) */ - for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { - ds_clear(&match); - ds_put_format(&match, - "inport == %s && arp.spa == %s/%u && arp.op == 1", - op->json_key, - op->lrp_networks.ipv4_addrs[i].network_s, - op->lrp_networks.ipv4_addrs[i].plen); - if (op->od->l3dgw_port && op == op->od->l3dgw_port - && op->od->l3redirect_port) { - ds_put_format(&match, " && is_chassis_resident(%s)", - op->od->l3redirect_port->json_key); - } - ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80, - ds_cstr(&match), - "put_arp(inport, arp.spa, arp.sha);"); - - } - - /* Handle GARP reply packets received on a distributed router gateway - * port. GARP reply broadcast packets could be sent by external - * switches. We don't want them to be handled by all the - * ovn-controllers if they receive it. So add a priority-92 flow to - * apply the put_arp action on a redirect chassis and drop it on - * other chassis. - * Note that we are already adding a priority-90 logical flow in the - * table S_ROUTER_IN_IP_INPUT to apply the put_arp action if - * arp.op == 2. - * */ - if (op->od->l3dgw_port && op == op->od->l3dgw_port - && op->od->l3redirect_port) { - for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { - ds_clear(&match); - ds_put_format(&match, - "inport == %s && is_chassis_resident(%s) && " - "eth.bcast && arp.op == 2 && arp.spa == %s/%u", - op->json_key, op->od->l3redirect_port->json_key, - op->lrp_networks.ipv4_addrs[i].network_s, - op->lrp_networks.ipv4_addrs[i].plen); - ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 92, - ds_cstr(&match), - "put_arp(inport, arp.spa, arp.sha);"); - ds_clear(&match); - ds_put_format(&match, - "inport == %s && !is_chassis_resident(%s) && " - "eth.bcast && arp.op == 2 && arp.spa == %s/%u", - op->json_key, op->od->l3redirect_port->json_key, - op->lrp_networks.ipv4_addrs[i].network_s, - op->lrp_networks.ipv4_addrs[i].plen); - ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 92, - ds_cstr(&match), "drop;"); - } - } - /* A set to hold all load-balancer vips that need ARP responses. */ struct sset all_ips = SSET_INITIALIZER(&all_ips); int addr_family; @@ -6924,7 +6952,6 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, ds_clear(&actions); ds_put_format(&actions, - "put_nd(inport, ip6.src, nd.sll); " "nd_na_router { " "eth.src = %s; " "ip6.src = %s; " diff --git a/ovn-architecture.7.xml b/ovn-architecture.7.xml index 6115e84b5..c98db48d2 100644 --- a/ovn-architecture.7.xml +++ b/ovn-architecture.7.xml @@ -970,6 +970,24 @@ this temporary use.) </p> </dd> + + <dt><code><var>R</var> = lookup_arp(<var>P</var>, <var>A</var>, <var>M</var>);</code></dt> + <dt><code><var>R</var> = lookup_nd(<var>P</var>, <var>A</var>, <var>M</var>);</code></dt> + <dd> + <p> + Implemented by storing arguments into OpenFlow fields, then + resubmitting to table 67, which <code>ovn-controller</code> + populates with flows generated from the <code>MAC_Binding</code> + table in the OVN Southbound database. If there is a match in table + 66, then its actions set the logical flow flag <code>MLF_LOOKUP_MAC</code>. + </p> + + <p> + (The OpenFlow actions save and restore the OpenFlow fields used for + the arguments, so that the OVN actions do not have to be aware of + this temporary use.) + </p> + </dd> </dl> </li> diff --git a/ovn-sb.xml b/ovn-sb.xml index 477e7bc7a..e5fb51a9d 100644 --- a/ovn-sb.xml +++ b/ovn-sb.xml @@ -1397,6 +1397,35 @@ <p><b>Example:</b> <code>put_arp(inport, arp.spa, arp.sha);</code></p> </dd> + <dt> + <code><var>R</var> = lookup_arp(<var>P</var>, <var>A</var>, <var>M</var>);</code> + </dt> + + <dd> + <p> + <b>Parameters</b>: logical port string field <var>P</var>, 32-bit + IP address field <var>A</var>, 48-bit MAC address field + <var>M</var>. + </p> + + <p> + <b>Result</b>: stored to a 1-bit subfield <var>R</var>. + </p> + + <p> + Looks up <var>A</var> and <var>M</var> in <var>P</var>'s mac + binding table. If an entry is found, stores <code>1</code> in + the 1-bit subfield <var>R</var>, else 0. + </p> + + <p> + <b>Example:</b> + <code> + reg0[0] = lookup_arp(inport, arp.spa, arp.sha); + </code> + </p> + </dd> + <dt><code>nd_ns { <var>action</var>; </code>...<code> };</code></dt> <dd> <p> @@ -1553,6 +1582,34 @@ <p><b>Example:</b> <code>put_nd(inport, nd.target, nd.tll);</code></p> </dd> + <dt><code><var>R</var> = lookup_nd(<var>P</var>, <var>A</var>, <var>M</var>);</code> + </dt> + + <dd> + <p> + <b>Parameters</b>: logical port string field <var>P</var>, 128-bit + IP address field <var>A</var>, 48-bit MAC address field + <var>M</var>. + </p> + + <p> + <b>Result</b>: stored to a 1-bit subfield <var>R</var>. + </p> + + <p> + Looks up <var>A</var> and <var>M</var> in <var>P</var>'s mac + binding table. If an entry is found, stores <code>1</code> in + the 1-bit subfield <var>R</var>, else 0. + </p> + + <p> + <b>Example:</b> + <code> + reg0[0] = lookup_nd(inport, ip6.src, eth.src); + </code> + </p> + </dd> + <dt> <code><var>R</var> = put_dhcp_opts(<var>D1</var> = <var>V1</var>, <var>D2</var> = <var>V2</var>, ..., <var>Dn</var> = <var>Vn</var>);</code> </dt> diff --git a/tests/ovn.at b/tests/ovn.at index 04898dd1f..c32a75c26 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -1143,6 +1143,33 @@ put_arp(inport, arp.spa, arp.sha); encodes as push:NXM_NX_REG0[],push:NXM_OF_ETH_SRC[],push:NXM_NX_ARP_SHA[],push:NXM_OF_ARP_SPA[],pop:NXM_NX_REG0[],pop:NXM_OF_ETH_SRC[],controller(userdata=00.00.00.01.00.00.00.00),pop:NXM_OF_ETH_SRC[],pop:NXM_NX_REG0[] has prereqs eth.type == 0x806 && eth.type == 0x806 +# lookup_arp +reg0[0] = lookup_arp(inport, ip4.dst, eth.src); + encodes as push:NXM_NX_REG0[],push:NXM_OF_IP_DST[],pop:NXM_NX_REG0[],set_field:0/0x40->reg10,resubmit(,67),move:NXM_NX_REG10[6]->NXM_NX_XXREG0[96],pop:NXM_NX_REG0[] + has prereqs eth.type == 0x800 +reg1[1] = lookup_arp(inport, arp.spa, arp.sha); + encodes as push:NXM_NX_REG0[],push:NXM_OF_ETH_SRC[],push:NXM_NX_ARP_SHA[],push:NXM_OF_ARP_SPA[],pop:NXM_NX_REG0[],pop:NXM_OF_ETH_SRC[],set_field:0/0x40->reg10,resubmit(,67),move:NXM_NX_REG10[6]->NXM_NX_XXREG0[65],pop:NXM_OF_ETH_SRC[],pop:NXM_NX_REG0[] + has prereqs eth.type == 0x806 && eth.type == 0x806 + +lookup_arp; + Syntax error at `lookup_arp' expecting action. +reg0[0] = lookup_arp; + Syntax error at `lookup_arp' expecting field name. +reg0[0] = lookup_arp(); + Syntax error at `)' expecting field name. +reg0[0] = lookup_arp(inport); + Syntax error at `)' expecting `,'. +reg0[0] = lookup_arp(inport ip4.dst); + Syntax error at `ip4.dst' expecting `,'. +reg0[0] = lookup_arp(inport, ip4.dst; + Syntax error at `;' expecting `,'. +reg0[0] = lookup_arp(inport, ip4.dst, eth.src; + Syntax error at `;' expecting `)'. +reg0[0] = lookup_arp(inport, eth.dst); + Cannot use 48-bit field eth.dst[0..47] where 32-bit field is required. +reg0[0] = lookup_arp(inport, ip4.src, ip4.dst); + Cannot use 32-bit field ip4.dst[0..31] where 48-bit field is required. + # put_dhcp_opts reg1[0] = put_dhcp_opts(offerip = 1.2.3.4, router = 10.0.0.1); encodes as controller(userdata=00.00.00.02.00.00.00.00.00.01.de.10.00.00.00.40.01.02.03.04.03.04.0a.00.00.01,pause) @@ -1243,6 +1270,35 @@ reg1[0] = put_dhcpv6_opts(ia_addr="ae70::4"); reg1[0] = put_dhcpv6_opts(ia_addr=ae70::4, domain_search=ae70::1); DHCPv6 option domain_search requires string value. +# lookup_nd +reg2[0] = lookup_nd(inport, ip6.dst, eth.src); + encodes as push:NXM_NX_XXREG0[],push:NXM_NX_IPV6_DST[],pop:NXM_NX_XXREG0[],set_field:0/0x40->reg10,resubmit(,67),move:NXM_NX_REG10[6]->NXM_NX_XXREG0[32],pop:NXM_NX_XXREG0[] + has prereqs eth.type == 0x86dd +reg3[0] = lookup_nd(inport, nd.target, nd.tll); + encodes as push:NXM_NX_XXREG0[],push:NXM_OF_ETH_SRC[],push:NXM_NX_ND_TLL[],push:NXM_NX_ND_TARGET[],pop:NXM_NX_XXREG0[],pop:NXM_OF_ETH_SRC[],set_field:0/0x40->reg10,resubmit(,67),move:NXM_NX_REG10[6]->NXM_NX_XXREG0[0],pop:NXM_OF_ETH_SRC[],pop:NXM_NX_XXREG0[] + has prereqs (icmp6.type == 0x87 || icmp6.type == 0x88) && eth.type == 0x86dd && ip.proto == 0x3a && (eth.type == 0x800 || eth.type == 0x86dd) && icmp6.code == 0 && eth.type == 0x86dd && ip.proto == 0x3a && (eth.type == 0x800 || eth.type == 0x86dd) && ip.ttl == 0xff && (eth.type == 0x800 || eth.type == 0x86dd) && icmp6.type == 0x88 && eth.type == 0x86dd && ip.proto == 0x3a && (eth.type == 0x800 || eth.type == 0x86dd) && icmp6.code == 0 && eth.type == 0x86dd && ip.proto == 0x3a && (eth.type == 0x800 || eth.type == 0x86dd) && ip.ttl == 0xff && (eth.type == 0x800 || eth.type == 0x86dd) + +lookup_nd; + Syntax error at `lookup_nd' expecting action. +reg0[0] = lookup_nd; + Syntax error at `lookup_nd' expecting field name. +reg0[0] = lookup_nd(); + Syntax error at `)' expecting field name. +reg0[0] = lookup_nd(inport); + Syntax error at `)' expecting `,'. +reg0[0] = lookup_nd(inport ip6.dst); + Syntax error at `ip6.dst' expecting `,'. +reg0[0] = lookup_nd(inport, ip6.dst; + Syntax error at `;' expecting `,'. +reg0[0] = lookup_nd(inport, ip6.dst, eth.src; + Syntax error at `;' expecting `)'. +reg0[0] = lookup_nd(inport, eth.dst); + Cannot use 48-bit field eth.dst[0..47] where 128-bit field is required. +reg0[0] = lookup_nd(inport, ip4.src, ip4.dst); + Cannot use 32-bit field ip4.src[0..31] where 128-bit field is required. +reg0[0] = lookup_nd(inport, ip6.src, ip6.dst); + Cannot use 128-bit field ip6.dst[0..127] where 48-bit field is required. + # set_queue set_queue(0); encodes as set_queue:0 @@ -14528,7 +14584,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ # Since the sw0-vir is not claimed by any chassis, eth.dst should be set to # zero if the ip4.dst is the virtual ip in the router pipeline. AT_CHECK([cat lflows.txt], [0], [dnl - table=9 (lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) + table=11(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) ]) ip_to_hex() { @@ -14564,7 +14620,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ # There should be an arp resolve flow to resolve the virtual_ip with the # sw0-p1's MAC. AT_CHECK([cat lflows.txt], [0], [dnl - table=9 (lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) + table=11(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) ]) # send the garp from sw0-p2 (in hv2). hv2 should claim sw0-vir @@ -14587,7 +14643,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ # There should be an arp resolve flow to resolve the virtual_ip with the # sw0-p2's MAC. AT_CHECK([cat lflows.txt], [0], [dnl - table=9 (lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) + table=11(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) ]) # Now send arp reply from sw0-p1. hv1 should claim sw0-vir @@ -14608,7 +14664,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=9 (lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) + table=11(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) ]) # Delete hv1-vif1 port. hv1 should release sw0-vir @@ -14626,7 +14682,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=9 (lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) + table=11(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) ]) # Now send arp reply from sw0-p2. hv2 should claim sw0-vir @@ -14647,7 +14703,7 @@ ovn-sbctl dump-flows lr0 | grep lr_in_arp_resolve | grep "reg0 == 10.0.0.10" \ > lflows.txt AT_CHECK([cat lflows.txt], [0], [dnl - table=9 (lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) + table=11(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) ]) # Delete sw0-p2 logical port @@ -15879,3 +15935,225 @@ as hv4 ovs-appctl fdb/show br-phys OVN_CLEANUP([hv1],[hv2],[hv3],[hv4]) AT_CLEANUP + +AT_SETUP([ovn -- ARP lookup before learning]) +AT_KEYWORDS([virtual ports]) +AT_SKIP_IF([test $HAVE_PYTHON = no]) +ovn_start + +send_garp() { + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6 + local request=${eth_dst}${eth_src}08060001080006040001${eth_src}${spa}${eth_dst}${tpa} + as hv$hv ovs-appctl netdev-dummy/receive hv${hv}-vif$inport $request +} + +send_arp_reply() { + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6 + local request=${eth_dst}${eth_src}08060001080006040002${eth_src}${spa}${eth_dst}${tpa} + as hv$hv ovs-appctl netdev-dummy/receive hv${hv}-vif$inport $request +} + +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl -- add-port br-int hv1-vif1 -- \ + set interface hv1-vif1 external-ids:iface-id=sw0-p1 \ + options:tx_pcap=hv1/vif1-tx.pcap \ + options:rxq_pcap=hv1/vif1-rx.pcap \ + ofport-request=1 +ovs-vsctl -- add-port br-int hv1-vif2 -- \ + set interface hv1-vif2 external-ids:iface-id=sw0-p3 \ + options:tx_pcap=hv1/vif2-tx.pcap \ + options:rxq_pcap=hv1/vif2-rx.pcap \ + ofport-request=2 + +sim_add hv2 +as hv2 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.2 +ovs-vsctl -- add-port br-int hv2-vif1 -- \ + set interface hv2-vif1 external-ids:iface-id=sw1-p1 \ + options:tx_pcap=hv2/vif1-tx.pcap \ + options:rxq_pcap=hv2/vif1-rx.pcap \ + ofport-request=1 + +ovn-nbctl ls-add sw0 + +ovn-nbctl lsp-add sw0 sw0-p1 +ovn-nbctl lsp-set-addresses sw0-p1 "50:54:00:00:00:03" + +# Create the second logical switch with one port +ovn-nbctl ls-add sw1 +ovn-nbctl lsp-add sw1 sw1-p1 +ovn-nbctl lsp-set-addresses sw1-p1 "40:54:00:00:00:03 20.0.0.3" +ovn-nbctl lsp-set-port-security sw1-p1 "40:54:00:00:00:03 20.0.0.3" + +# Create a logical router and attach both logical switches +ovn-nbctl lr-add lr0 +ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 +ovn-nbctl lsp-add sw0 sw0-lr0 +ovn-nbctl lsp-set-type sw0-lr0 router +ovn-nbctl lsp-set-addresses sw0-lr0 00:00:00:00:ff:01 +ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0 + +ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 +ovn-nbctl lsp-add sw1 sw1-lr0 +ovn-nbctl lsp-set-type sw1-lr0 router +ovn-nbctl lsp-set-addresses sw1-lr0 00:00:00:00:ff:02 +ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1 + +OVN_POPULATE_ARP +ovn-nbctl --wait=hv sync + +as hv1 ovs-appctl -t ovn-controller vlog/set dbg + +ip_to_hex() { + printf "%02x%02x%02x%02x" "$@" +} + +# From sw0-p1 send GARP for 10.0.0.30. +# ovn-controller should learn the +# mac_binding entry +# port - lr0-sw0 +# ip - 10.0.0.30 +# mac - 50:54:00:00:00:03 + +AT_CHECK([test 0 = `ovn-sbctl list mac_binding | wc -l`]) +eth_src=505400000003 +eth_dst=ffffffffffff +spa=$(ip_to_hex 10 0 0 30) +tpa=$(ip_to_hex 10 0 0 30) +send_garp 1 1 $eth_src $eth_dst $spa $tpa + +OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns _uuid list mac_binding | wc -l`]) + +AT_CHECK([ovn-sbctl --format=csv --bare --columns logical_port,ip,mac \ +list mac_binding], [0], [lr0-sw0 +10.0.0.30 +50:54:00:00:00:03 +]) + +AT_CHECK([test 1 = `cat hv1/ovn-controller.log | grep NXT_PACKET_IN2 | wc -l`]) +AT_CHECK([test 1 = `as hv1 ovs-ofctl dump-flows br-int table=10 | grep arp | \ +grep controller | grep -v n_packets=0 | wc -l`]) + +# Wait for an entry in table=67 +OVS_WAIT_UNTIL( + [test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep n_packets=0 \ +| wc -l`] +) + +# Send garp again. This time the packet should not be sent to ovn-controller. +send_garp 1 1 $eth_src $eth_dst $spa $tpa +# Wait for an entry in table=67 +OVS_WAIT_UNTIL([test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep n_packets=1 | wc -l`]) + +# The packet should not be sent to ovn-controller. The packet +count should be 1 only. +AT_CHECK([test 1 = `cat hv1/ovn-controller.log | grep NXT_PACKET_IN2 | wc -l`]) +AT_CHECK([test 1 = `as hv1 ovs-ofctl dump-flows br-int table=10 | grep arp | \ +grep controller | grep -v n_packets=0 | wc -l`]) + +# Now send garp packet with different mac. +eth_src=505400000013 +eth_dst=ffffffffffff +spa=$(ip_to_hex 10 0 0 30) +tpa=$(ip_to_hex 10 0 0 30) +send_garp 1 1 $eth_src $eth_dst $spa $tpa + +# The garp packet should be sent to ovn-controller and the mac_binding entry +# should be updated. +OVS_WAIT_UNTIL([test 2 = `cat hv1/ovn-controller.log | grep NXT_PACKET_IN2 | wc -l`]) + +AT_CHECK([test 1 = `ovn-sbctl --bare --columns _uuid list mac_binding | wc -l`]) + +AT_CHECK([ovn-sbctl --format=csv --bare --columns logical_port,ip,mac \ +list mac_binding], [0], [lr0-sw0 +10.0.0.30 +50:54:00:00:00:13 +]) + +# Send ARP request to lrp - lr0-sw1 (20.0.0.1) using src mac 50:54:00:00:00:33 +# and src ip - 10.0.0.50.from sw0-p1. +# ovn-controller should add the mac_binding entry +# logical_port - lr0 +# IP - 10.0.0.50 +# MAC - 50:54:00:00:00:33 +eth_src=505400000033 +eth_dst=ffffffffffff +spa=$(ip_to_hex 10 0 0 50) +tpa=$(ip_to_hex 20 0 0 1) + +send_garp 1 1 $eth_src $eth_dst $spa $tpa + +# The garp packet should be sent to ovn-controller and the mac_binding entry +# should be updated. +OVS_WAIT_UNTIL([test 3 = `cat hv1/ovn-controller.log | grep NXT_PACKET_IN2 | wc -l`]) + +OVS_WAIT_UNTIL( + [test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep dl_src=50:54:00:00:00:33 \ +| wc -l`] +) + +AT_CHECK([ovn-sbctl --format=csv --bare --columns logical_port,ip,mac \ +find mac_binding ip=10.0.0.50], [0], [lr0-sw0 +10.0.0.50 +50:54:00:00:00:33 +]) + +# Send the same packet again. +send_garp 1 1 $eth_src $eth_dst $spa $tpa + +OVS_WAIT_UNTIL( + [test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep dl_src=50:54:00:00:00:33 \ +| grep n_packets=1 | wc -l`] +) + +AT_CHECK([test 3 = `cat hv1/ovn-controller.log | grep NXT_PACKET_IN2 | wc -l`]) + +# Now send ARP reply packet with IP - 10.0.0.40 and mac 505400000023 +eth_src=505400000023 +eth_dst=ffffffffffff +spa=$(ip_to_hex 10 0 0 40) +tpa=$(ip_to_hex 10 0 0 50) +send_arp_reply 1 1 $eth_src $eth_dst $spa $tpa + +# ovn-controller should add the +# mac_binding entry +# port - lr0-sw0 +# ip - 10.0.0.40 +# mac - 50:54:00:00:00:23 + +# The garp packet should be sent to ovn-controller and the mac_binding entry +# should be updated. +OVS_WAIT_UNTIL([test 4 = `cat hv1/ovn-controller.log | grep NXT_PACKET_IN2 | wc -l`]) + +# Wait for an entry in table=67 for the learnt mac_binding entry. + +OVS_WAIT_UNTIL( + [test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep dl_src=50:54:00:00:00:23 \ +| wc -l`] +) + +# Send the same garp reply. This time it should not be sent to ovn-controller. +send_arp_reply 1 1 $eth_src $eth_dst $spa $tpa +OVS_WAIT_UNTIL( + [test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep dl_src=50:54:00:00:00:23 \ +| grep n_packets=1 | wc -l`] +) + +AT_CHECK([test 4 = `cat hv1/ovn-controller.log | grep NXT_PACKET_IN2 | wc -l`]) + +send_arp_reply 1 1 $eth_src $eth_dst $spa $tpa +OVS_WAIT_UNTIL( + [test 1 = `as hv1 ovs-ofctl dump-flows br-int table=67 | grep dl_src=50:54:00:00:00:23 \ +| grep n_packets=2 | wc -l`] +) + +AT_CHECK([test 4 = `cat hv1/ovn-controller.log | grep NXT_PACKET_IN2 | wc -l`]) + +OVN_CLEANUP([hv1], [hv2]) +AT_CLEANUP diff --git a/tests/test-ovn.c b/tests/test-ovn.c index 8462c21b6..c16f9c5cc 100644 --- a/tests/test-ovn.c +++ b/tests/test-ovn.c @@ -1297,6 +1297,7 @@ test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED) .egress_ptable = 40, .output_ptable = 64, .mac_bind_ptable = 65, + .mac_lookup_ptable = 67, }; struct ofpbuf ofpacts; ofpbuf_init(&ofpacts, 0); diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c index 0583610b9..c95acb897 100644 --- a/utilities/ovn-trace.c +++ b/utilities/ovn-trace.c @@ -556,6 +556,22 @@ ovntrace_mac_binding_find(const struct ovntrace_datapath *dp, return NULL; } +static const struct ovntrace_mac_binding * +ovntrace_mac_binding_find_mac_ip(const struct ovntrace_datapath *dp, + uint16_t port_key, const struct in6_addr *ip, + struct eth_addr mac) +{ + const struct ovntrace_mac_binding *bind; + HMAP_FOR_EACH_WITH_HASH (bind, node, hash_mac_binding(port_key, ip), + &dp->mac_bindings) { + if (bind->port_key == port_key && ipv6_addr_equals(ip, &bind->ip) + && eth_addr_equals(bind->mac, mac)) { + return bind; + } + } + return NULL; +} + /* If 's' ends with a UUID, returns a copy of it with the UUID truncated to * just the first 6 characters; otherwise, returns a copy of 's'. */ static char * @@ -1704,6 +1720,51 @@ execute_get_mac_bind(const struct ovnact_get_mac_bind *bind, ETH_ADDR_ARGS(uflow->dl_dst)); } +static void +execute_lookup_mac(const struct ovnact_lookup_mac_bind *bind OVS_UNUSED, + const struct ovntrace_datapath *dp OVS_UNUSED, + struct flow *uflow OVS_UNUSED, + struct ovs_list *super OVS_UNUSED) +{ + /* Get logical port number.*/ + struct mf_subfield port_sf = expr_resolve_field(&bind->port); + ovs_assert(port_sf.n_bits == 32); + uint32_t port_key = mf_get_subfield(&port_sf, uflow); + + /* Get IP address. */ + struct mf_subfield ip_sf = expr_resolve_field(&bind->ip); + ovs_assert(ip_sf.n_bits == 32 || ip_sf.n_bits == 128); + union mf_subvalue ip_sv; + mf_read_subfield(&ip_sf, uflow, &ip_sv); + struct in6_addr ip = (ip_sf.n_bits == 32 + ? in6_addr_mapped_ipv4(ip_sv.ipv4) + : ip_sv.ipv6); + + /* Get MAC. */ + struct mf_subfield mac_sf = expr_resolve_field(&bind->mac); + ovs_assert(mac_sf.n_bits == 48); + union mf_subvalue mac_sv; + mf_read_subfield(&mac_sf, uflow, &mac_sv); + + const struct ovntrace_mac_binding *binding + = ovntrace_mac_binding_find_mac_ip(dp, port_key, &ip, mac_sv.mac); + + struct mf_subfield dst = expr_resolve_field(&bind->dst); + uint8_t val = 0; + + if (binding) { + val = 1; + ovntrace_node_append(super, OVNTRACE_NODE_ACTION, + "/* MAC binding to "ETH_ADDR_FMT" found. */", + ETH_ADDR_ARGS(uflow->dl_dst)); + } else { + ovntrace_node_append(super, OVNTRACE_NODE_ACTION, + "/* lookup failed - No MAC binding. */"); + } + union mf_subvalue sv = { .u8_val = val }; + mf_write_subfield_flow(&dst, &sv, uflow); +} + static void execute_put_opts(const struct ovnact_put_opts *po, const char *name, struct flow *uflow, @@ -2072,6 +2133,14 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, /* Nothing to do for tracing. */ break; + case OVNACT_LOOKUP_ARP: + execute_lookup_mac(ovnact_get_LOOKUP_ARP(a), dp, uflow, super); + break; + + case OVNACT_LOOKUP_ND: + execute_lookup_mac(ovnact_get_LOOKUP_ND(a), dp, uflow, super); + break; + case OVNACT_PUT_DHCPV4_OPTS: execute_put_dhcp_opts(ovnact_get_PUT_DHCPV4_OPTS(a), "put_dhcp_opts", uflow, super); -- 2.21.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev