From: Erlon R. Cruz <er...@canonical.com> At the current state, OVN can not handle UDP fragmented traffic for ACLs in the userspace. Just like in the case of LB (commit 20a96b9), the kernel DP will try to reassemble the fragments during CT lookup, however userspace won't reassemble them. One way to solve that is to use the ct_udp field to match on ct.new connections. With the current state, OVN will get the CMS rules and write them verbatim to SB, which will generate similar rules in OVS. So, this workaround replaces L4 protocol matches with connection tracking equivalents. For example:
"outport == "server" && udp && udp.dst == 4242" becomes: "outport == "server" && ct.new && ct_udp.dst == 4242" We made this behavior to be optional and disabled by default. In order to enable it, the user needs to set an NB_Global flag: ovn-nbctl set NB_Global . options:udp_ct_translation=true Signed-off-by: Erlon R. Cruz <er...@canonical.com> --- NEWS | 4 + northd/en-global-config.c | 5 ++ northd/northd.c | 156 +++++++++++++++++++++++++++++++-- ovn-nb.xml | 11 +++ tests/system-ovn.at | 180 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 351 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index 0cce1790d..54d94a0a5 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,10 @@ Post v25.03.0 with stateless ACL to work with load balancer. - Added new ovn-nbctl command 'pg-get-ports' to get the ports assigned to the port group. + - Added new NB_Global option 'udp_ct_translation' to control whether + stateful ACL rules that match on UDP port fields are rewritten to use + connection tracking fields to properly handle IP fragments. Default is + false. - The ovn-controller option ovn-ofctrl-wait-before-clear is no longer supported. It will be ignored if used. ovn-controller will automatically care about proper delay before clearing lflow. diff --git a/northd/en-global-config.c b/northd/en-global-config.c index e7d6a7e66..b88274f61 100644 --- a/northd/en-global-config.c +++ b/northd/en-global-config.c @@ -639,6 +639,11 @@ check_nb_options_out_of_sync( return true; } + if (config_out_of_sync(&nb->options, &config_data->nb_options, + "udp_ct_translation", false)) { + return true; + } + return false; } diff --git a/northd/northd.c b/northd/northd.c index 764575f21..196f7a773 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -69,6 +69,7 @@ #include "uuid.h" #include "ovs-thread.h" #include "openvswitch/vlog.h" +#include <ctype.h> VLOG_DEFINE_THIS_MODULE(northd); @@ -86,6 +87,12 @@ static bool use_common_zone = false; * Otherwise, it will avoid using it. The default is true. */ static bool use_ct_inv_match = true; +/* If this option is 'true' northd will rewrite stateful ACL rules that match + * on UDP port fields to use connection tracking fields to properly handle IP + * fragments. By default this option is set to 'false'. + */ +static bool udp_ct_translation = false; + /* If this option is 'true' northd will implicitly add a lowest-priority * drop rule in the ACL stage of logical switches that have at least one * ACL. @@ -7335,6 +7342,113 @@ build_acl_sample_default_flows(const struct ovn_datapath *od, "next;", lflow_ref); } +/* We need toetect if an ACL match string contains L4 matches. */ +static bool +acl_has_l4_match(const char *match_str) +{ + return (strstr(match_str, "udp.")); +} + +/* Extracts port number from a match string. + * Examples of match strings and extracted values: + * - "udp.dst == 4242" -> extracts "4242" for field "udp.dst" + * - "outport == \"server\" && udp && udp.dst == 4242" -> extracts "4242" for + * field "udp.dst" + */ +static char * +extract_port_value(const char *match, const char *field) +{ + static char port_buf[16]; + char *field_pos = strstr(match, field); + if (!field_pos) { + return NULL; + } + + /* Move past the field name */ + field_pos += strlen(field); + + /* Skip spaces and == */ + while (*field_pos && (*field_pos == ' ' || *field_pos == '=')) { + field_pos++; + } + + /* Extract the port number */ + int i = 0; + while (*field_pos && isdigit(*field_pos) && i < 15) { + port_buf[i++] = *field_pos++; + } + port_buf[i] = '\0'; + + return i > 0 ? port_buf : NULL; +} + +/* This function implements a workaround for stateful ACLs with L4 matches that + * need to handle IP fragments properly. The issue is that UDP L4 headers are + * only present in the first fragment of a fragmented packet. Subsequent + * fragments don't have L4 headers, so they won't match ACL rules that look for + * L4 fields. + * + * The workaround replaces L4 protocol matches with connection tracking + * equivalents. For example: + * "outport == "server" && udp && udp.dst == 4242" + * becomes: + * "outport == "server" && ct.new && ct_udp.dst == 4242" + */ +static char * +rewrite_match_for_fragments(const char *match_str) +{ + struct ds new_match = DS_EMPTY_INITIALIZER; + char *str_copy = xstrdup(match_str); + char *token; + char *saveptr; + + /* First pass: Extract non-L4 parts and detect L4 protocol */ + token = strtok_r(str_copy, "&&", &saveptr); + while (token) { + /* Skip leading/trailing spaces */ + while (*token == ' ') { + token++; + } + + /* Check if this token contains L4 protocol info */ + if (!(strstr(token, "udp"))) { + /* Keep non-L4 parts of the match */ + if (new_match.length > 0) { + ds_put_cstr(&new_match, " && "); + } + ds_put_cstr(&new_match, token); + } + token = strtok_r(NULL, "&&", &saveptr); + } + + /* Add ct.new condition */ + if (new_match.length > 0) { + ds_put_cstr(&new_match, " && "); + } + ds_put_cstr(&new_match, "ct.new"); + + /* Second pass: Add connection tracking L4 matches */ + free(str_copy); + str_copy = xstrdup(match_str); + + /* Use the extract_port_value helper function to get port numbers */ + + /* Add specific L4 field matches with port numbers */ + char *port; + if ((port = extract_port_value(match_str, "udp.dst"))) { + ds_put_format(&new_match, " && ct_udp.dst == %s", port); + } + if ((port = extract_port_value(match_str, "udp.src"))) { + ds_put_format(&new_match, " && ct_udp.src == %s", port); + } + + /* Clean up and return the result */ + free(str_copy); + char *result = xstrdup(ds_cstr(&new_match)); + ds_destroy(&new_match); + return result; +} + static void consider_acl(struct lflow_table *lflows, const struct ovn_datapath *od, const struct nbrec_acl *acl, bool has_stateful, @@ -7385,6 +7499,25 @@ consider_acl(struct lflow_table *lflows, const struct ovn_datapath *od, match_tier_len = match->length; } + /* Check if this ACL has L4 matches that need fragment handling */ + bool has_l4_match = acl_has_l4_match(acl->match); + char *modified_match = NULL; + + /* For stateful ACLs with L4 matches, rewrite the match string to handle + * fragments, but only if udp_ct_translation is enabled */ + if (has_stateful && has_l4_match && udp_ct_translation) { + modified_match = rewrite_match_for_fragments(acl->match); + + /* Log the rewrite for debugging and visibility */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_DBG_RL(&rl, "Rewriting ACL match for L4 fragment handling: " + "original='%s' modified='%s'", acl->match, modified_match); + } + + /* Use the original or modified match string based on whether L4 matches + * were detected */ + const char *match_to_use = modified_match ? modified_match : acl->match; + if (!has_stateful || !strcmp(acl->action, "pass") || !strcmp(acl->action, "allow-stateless")) { @@ -7394,10 +7527,14 @@ consider_acl(struct lflow_table *lflows, const struct ovn_datapath *od, obs_stage); ds_put_cstr(actions, "next;"); - ds_put_format(match, "(%s)", acl->match); + ds_put_format(match, "(%s)", match_to_use); ovn_lflow_add_with_hint(lflows, od, stage, priority, ds_cstr(match), ds_cstr(actions), &acl->header_, lflow_ref); + + if (modified_match) { + free(modified_match); + } return; } @@ -7423,7 +7560,7 @@ consider_acl(struct lflow_table *lflows, const struct ovn_datapath *od, */ ds_truncate(match, match_tier_len); ds_put_format(match, REGBIT_ACL_HINT_ALLOW_NEW " == 1 && (%s)", - acl->match); + match_to_use); ds_truncate(actions, log_verdict_len); @@ -7468,7 +7605,7 @@ consider_acl(struct lflow_table *lflows, const struct ovn_datapath *od, ds_truncate(match, match_tier_len); ds_truncate(actions, log_verdict_len); ds_put_format(match, REGBIT_ACL_HINT_ALLOW " == 1 && (%s)", - acl->match); + match_to_use); if (acl->label || acl->sample_est) { ds_put_cstr(actions, REGBIT_CONNTRACK_COMMIT" = 1; "); } @@ -7490,7 +7627,7 @@ consider_acl(struct lflow_table *lflows, const struct ovn_datapath *od, * connection, then we can simply reject/drop it. */ ds_truncate(match, match_tier_len); ds_put_cstr(match, REGBIT_ACL_HINT_DROP " == 1"); - ds_put_format(match, " && (%s)", acl->match); + ds_put_format(match, " && (%s)", match_to_use); ds_truncate(actions, log_verdict_len); @@ -7514,7 +7651,7 @@ consider_acl(struct lflow_table *lflows, const struct ovn_datapath *od, */ ds_truncate(match, match_tier_len); ds_put_cstr(match, REGBIT_ACL_HINT_BLOCK " == 1"); - ds_put_format(match, " && (%s)", acl->match); + ds_put_format(match, " && (%s)", match_to_use); ds_truncate(actions, log_verdict_len); @@ -7529,6 +7666,11 @@ consider_acl(struct lflow_table *lflows, const struct ovn_datapath *od, ds_cstr(match), ds_cstr(actions), &acl->header_, lflow_ref); } + + /* Free the modified match string if it was created */ + if (modified_match) { + free(modified_match); + } } static void @@ -19538,6 +19680,10 @@ ovnnb_db_run(struct northd_input *input_data, use_common_zone = smap_get_bool(input_data->nb_options, "use_common_zone", false); + udp_ct_translation = smap_get_bool(input_data->nb_options, + "udp_ct_translation", + false); + vxlan_mode = input_data->vxlan_mode; build_datapaths(ovnsb_txn, diff --git a/ovn-nb.xml b/ovn-nb.xml index 622831da4..652774d78 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -429,6 +429,17 @@ </p> </column> + <column name="options" key="udp_ct_translation"> + <p> + If set to <code>true</code>, <code>ovn-northd</code> will rewrite + stateful ACL rules that match on UDP port fields to use connection + tracking fields (ct_udp.dst, ct_udp.src) to properly handle IP + fragments. This is a workaround to ensure that fragmented UDP packets + match ACLs correctly. By default this option is set to + <code>false</code>. + </p> + </column> + <column name="options" key="enable_chassis_nb_cfg_update"> <p> If set to <code>false</code>, ovn-controllers will no longer update diff --git a/tests/system-ovn.at b/tests/system-ovn.at index e0407383a..f58083d3e 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -17970,3 +17970,183 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d /connection dropped.*/d"]) AT_CLEANUP ]) + + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ACL correctly handles fragmented traffic]) +AT_KEYWORDS([ovnacl]) + +CHECK_CONNTRACK() +CHECK_CONNTRACK_NAT() + +ovn_start +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-int]) +ADD_BR([br-ext]) + +# Logical network: +# 2 logical switches "public" (192.168.1.0/24) and "internal" (172.16.1.0/24) +# connected to a router lr. +# internal has a server. +# client is connected through localnet. + +check ovs-ofctl add-flow br-ext action=normal +# Set external-ids in br-int needed for ovn-controller +check ovs-vsctl \ + -- set Open_vSwitch . external-ids:system-id=hv1 \ + -- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ + -- set bridge br-int fail-mode=secure other-config:disable-in-band=true \ + -- set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext + + +# Start ovn-controller +start_daemon ovn-controller + +# Set the minimal fragment size for userspace DP. +# Note that this call will fail for system DP as this setting is not supported there. +ovs-appctl dpctl/ipf-set-min-frag v4 500 + +check ovn-nbctl lr-add lr +check ovn-nbctl ls-add internal +check ovn-nbctl ls-add public + +check ovn-nbctl lrp-add lr lr-pub 00:00:01:01:02:03 192.168.1.1/24 +check ovn-nbctl lsp-add public pub-lr -- set Logical_Switch_Port pub-lr \ + type=router options:router-port=lr-pub addresses=\"00:00:01:01:02:03\" + +check ovn-nbctl lrp-add lr lr-internal 00:00:01:01:02:04 172.16.1.1/24 +check ovn-nbctl lsp-add internal internal-lr -- set Logical_Switch_Port internal-lr \ + type=router options:router-port=lr-internal addresses=\"00:00:01:01:02:04\" + +check ovn-nbctl lsp-add public ln_port \ + -- lsp-set-addresses ln_port unknown \ + -- lsp-set-type ln_port localnet \ + -- lsp-set-options ln_port network_name=phynet + +ADD_NAMESPACES(client) +ADD_VETH(client, client, br-ext, "192.168.1.2/24", "f0:00:00:01:02:03", \ + "192.168.1.1") +NS_EXEC([client], [ip l set dev client mtu 900]) +NS_EXEC([client], [ip a]) +NS_CHECK_EXEC([client], [ip route add 172.16.1.0/24 via 192.168.1.1 dev client]) + +ADD_NAMESPACES(server) +ADD_VETH(server, server, br-int, "172.16.1.2/24", "f0:00:0f:01:02:03", \ + "172.16.1.1") +check ovn-nbctl lsp-add internal server \ +-- lsp-set-addresses server "f0:00:0f:01:02:03 172.16.1.2" +NS_EXEC([server], [ip a]) +NS_CHECK_EXEC([server], [ip route add 192.168.1.0/24 via 172.16.1.1 dev server]) + +check ovn-nbctl set logical_router lr options:chassis=hv1 + +# Adds debug logging for dpif +check ovs-appctl vlog/set dpif:dbg + +dump_ovs_info() { + echo ====== $1 ====== + ovs-ofctl show br-int + echo && echo + echo =======ovn-nbctl show======== + ovn-nbctl show + echo ============br-int=========== + ovs-ofctl dump-flows -O OpenFlow15 br-int + echo && echo + echo ============br-ext=========== + ovs-ofctl dump-flows br-ext + echo && echo + echo ========lflow-list=========== + ovn-sbctl lflow-list + echo && echo + ovn-sbctl list Logical_Flow + echo && echo + echo =========ofproto-trace========== + ovs-appctl ofproto/trace br-int "udp,vlan_tci=0x0000,dl_src=f0:00:00:01:02:03,dl_dst=00:00:01:01:02:03,nw_src=192.168.1.2,nw_dst=172.16.1.2,nw_tos=0,nw_ecn=0,nw_ttl=64,nw_frag=first,udp_src=46801,udp_dst=4242" + echo ============================= +} + +AT_DATA([udp_client.py], [dnl +import socket + +sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +sock.sendto(b"x" * 1000, ("172.16.1.2", 4242)) +]) + +test_fragmented_udp_traffic() { + check ovn-nbctl --wait=hv sync + check ovs-appctl dpctl/flush-conntrack + + NETNS_DAEMONIZE([server], [nc -l -u 172.16.1.2 4242 > /dev/null], [server.pid]) + + # Collect ICMP packets on client side + NETNS_START_TCPDUMP([client], [-U -i client -vnne udp], [tcpdump-client]) + + # Collect UDP packets on server side + NETNS_START_TCPDUMP([server], [-U -i server -vnne 'udp and ip[[6:2]] > 0 and not ip[[6]] = 64'], [tcpdump-server]) + + NS_CHECK_EXEC([client], [$PYTHON3 ./udp_client.py]) + OVS_WAIT_UNTIL([test "$(cat tcpdump-server.tcpdump | wc -l)" = "4"]) + + echo ============conntrack-table============ + ovs-appctl dpctl/dump-conntrack + echo ======================================= + + kill $(cat tcpdump-client.pid) $(cat tcpdump-server.pid) $(cat server.pid) +} + +AS_BOX([Router without ACL]) +test_fragmented_udp_traffic + +AS_BOX([Router with ACL: UDP]) + +# Add UDP ACL +check ovn-nbctl --wait=hv acl-add internal to-lport 1000 "outport == \"server\" && udp && udp.dst == 4242" allow-related +check ovn-nbctl --wait=hv --log --severity=info acl-add internal to-lport 100 "outport == \"server\"" drop + +# Test with udp_ct_translation disabled (default) +AS_BOX([UDP CT Translation disabled (default)]) +dump_ovs_info "With UDP CT Translation disabled" + +# Capture logical flows to verify no CT field rewriting +echo "Checking for CT field rewriting in logical flows (should NOT find any):" +ovn-sbctl lflow-list | grep -i "ct_udp" || echo "No CT UDP rewriting found (expected)" + +# Test with udp_ct_translation enabled +AS_BOX([UDP CT Translation enabled]) +check ovn-nbctl set NB_Global . options:udp_ct_translation=true + +# Wait for northd to process the option change +sleep 2 +check ovn-nbctl --wait=hv sync + +dump_ovs_info "With UDP CT Translation enabled" + +# Capture logical flows to verify CT field rewriting +echo "Checking for CT field rewriting in logical flows (should find some):" +ovn-sbctl lflow-list | grep -i "ct_udp" && echo "CT UDP rewriting found (expected)" || echo "No CT UDP rewriting found (unexpected)" + +# Test UDP fragmented traffic with CT translation enabled +test_fragmented_udp_traffic + +# Reset the option back to default +check ovn-nbctl set NB_Global . options:udp_ct_translation=false + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d +/connection dropped.*/d +/WARN|netdev@ovs-netdev: execute.*/d +/dpif|WARN|system@ovs-system: execute.*/d +"]) +AT_CLEANUP +]) -- 2.43.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev