On Mon, Oct 17, 2022 at 6:50 PM Paolo Valerio <pvale...@redhat.com> wrote:
> Hello Ales, > Hi Paolo, thank you for the feedback. > > overall the approach is ok, the only concern is that, unless I'm missing > something, in case of many connections, the exact match deletion could > potentially take a while, whereas in the previous case the cost > was basically a lookup (constant time) and of course the remaining > deletion operation. > > It would be nice to avoid the extra cost when the whole 5-tuple is > specified. WDYT? > Ack, it will be in v1 of formal patch. > > Ales Musil <amu...@redhat.com> writes: > > > Curreently in order to flush conntrack you would need to > > specify full 5-tuple. Add support for partial match > > it still has some limitations however it is capable of flushing > > all that match specified field e.g. source ip address. > > > > Reported-at: https://bugzilla.redhat.com/2120546 > > Signed-off-by: Ales Musil <amu...@redhat.com> > > --- > > NEWS | 2 + > > lib/ct-dpif.c | 178 +++++++++++++++++++++++++++++++--------- > > lib/dpctl.man | 3 +- > > tests/system-traffic.at | 84 ++++++++++++++++++- > > 4 files changed, 226 insertions(+), 41 deletions(-) > > > > diff --git a/NEWS b/NEWS > > index ff77ee404..81909812e 100644 > > --- a/NEWS > > +++ b/NEWS > > @@ -23,6 +23,8 @@ Post-v3.0.0 > > bug and CVE fixes addressed since its release. > > If a user wishes to benefit from these fixes it is recommended > to use > > DPDK 21.11.2. > > + - ovs-dpctl and related ovs-appctl commands: > > + * "flush-conntrack" is capable of handling partial 5-tuple. > > > > > > v3.0.0 - 15 Aug 2022 > > diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c > > index cfc2315e3..57995f5e5 100644 > > --- a/lib/ct-dpif.c > > +++ b/lib/ct-dpif.c > > @@ -18,6 +18,8 @@ > > #include "dpif-provider.h" > > > > #include <errno.h> > > +#include <sys/types.h> > > +#include <netinet/in.h> > > > > #include "ct-dpif.h" > > #include "openvswitch/ofp-parse.h" > > @@ -109,7 +111,113 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump) > > ? dpif->dpif_class->ct_dump_done(dpif, dump) > > : EOPNOTSUPP); > > } > > - > > was this intentional? > Just checking > > > + > > +static inline bool > > +ct_dpif_inet_addr_cmp_partial(const union ct_dpif_inet_addr *partial, > > + const union ct_dpif_inet_addr *addr) > > +{ > > + if (!ipv6_is_zero(&partial->in6) && > > + !ipv6_addr_equals(&partial->in6, &addr->in6)) { > > + return false; > > + } > > + return true; > > +} > > + > > +/* Compares the non-zero members if they match. This is usefull for > clearing > > + * up all conntracks specified by a partial tuple. */ > > +static inline bool > > +ct_dpif_tuple_cmp_partial(const struct ct_dpif_tuple *partial, > > + const struct ct_dpif_tuple *tuple) > > +{ > > + /* There is no point in continuing if both do not use the same eth > type. */ > > + if (partial->l3_type != tuple->l3_type) { > > + return false; > > + } > > + > > + if (partial->ip_proto && partial->ip_proto != tuple->ip_proto) { > > + return false; > > + } > > + > > + if (!ct_dpif_inet_addr_cmp_partial(&partial->src, &tuple->src)) { > > + return false; > > + } > > + > > + if (!ct_dpif_inet_addr_cmp_partial(&partial->dst, &tuple->dst)) { > > + return false; > > + } > > + > > + if (partial->ip_proto == IPPROTO_TCP || partial->ip_proto == > IPPROTO_UDP) { > > + > > + if (partial->src_port && partial->src_port != tuple->src_port) { > > + return false; > > + } > > + > > + if (partial->dst_port && partial->dst_port != tuple->dst_port) { > > + return false; > > + } > > + } else if (partial->ip_proto == IPPROTO_ICMP || > > + partial->ip_proto == IPPROTO_ICMPV6) { > > + > > + if (partial->icmp_id != tuple->icmp_id) { > > + return false; > > + } > > + > > + if (partial->icmp_type != tuple->icmp_type) { > > + return false; > > + } > > + > > + if (partial->icmp_code != tuple->icmp_code) { > > + return false; > > + } > > + } > > + > > + return true; > > +} > > + > > +static int > > +ct_dpif_flush_tuple(struct dpif *dpif, const uint16_t *zone, > > + const struct ct_dpif_tuple *tuple) { > > + struct ct_dpif_dump_state *dump; > > + struct ct_dpif_entry cte; > > + int error; > > + int tot_bkts; > > + > > + if (!dpif->dpif_class->ct_flush) { > > + return EOPNOTSUPP; > > + } > > + > > + if (VLOG_IS_DBG_ENABLED()) { > > + struct ds ds = DS_EMPTY_INITIALIZER; > > + ct_dpif_format_tuple(&ds, tuple); > > + VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), > ds_cstr(&ds), > > + zone ? *zone : 0); > > + ds_destroy(&ds); > > + } > > + > > + error = ct_dpif_dump_start(dpif, &dump, zone, &tot_bkts); > > + if (error) { > > + return error; > > + } > > + > > + while (!(error = ct_dpif_dump_next(dump, &cte))) { > > + if (zone && *zone != cte.zone) { > > + continue; > > + } > > + > > + if (ct_dpif_tuple_cmp_partial(tuple, &cte.tuple_orig)) { > > + error = dpif->dpif_class->ct_flush(dpif, &cte.zone, > > + &cte.tuple_orig); > > + if (error) { > > + break; > > + } > > + } > > + } > > + if (error == EOF) { > > + error = 0; > > + } > > + ct_dpif_dump_done(dump); > > + return error; > > +} > > /* Flush the entries in the connection tracker used by 'dpif'. The > > * arguments have the following behavior: > > * > > @@ -123,11 +231,7 @@ ct_dpif_flush(struct dpif *dpif, const uint16_t > *zone, > > const struct ct_dpif_tuple *tuple) > > { > > if (tuple) { > > - struct ds ds = DS_EMPTY_INITIALIZER; > > - ct_dpif_format_tuple(&ds, tuple); > > - VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), > ds_cstr(&ds), > > - zone ? *zone : 0); > > - ds_destroy(&ds); > > + return ct_dpif_flush_tuple(dpif, zone, tuple); > > } else if (zone) { > > VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone); > > } else { > > @@ -581,6 +685,21 @@ ct_dpif_format_tcp_stat(struct ds * ds, int > tcp_state, int conn_per_state) > > ds_put_format(ds, "=%u", conn_per_state); > > } > > > > +static bool > > +ct_dpif_parse_tuple_ip(union ct_dpif_inet_addr *addr, char *value, > > + uint16_t l3_type) > > +{ > > + if (!ipv6_is_zero(&addr->in6)) { > > + return false; > > + } > > + if (l3_type == AF_INET) { > > + ip_parse(value, &addr->ip); > > + } else { > > + ipv6_parse(value, &addr->in6); > > + } > > + return true; > > +} > > + > > /* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'. > > * Returns true on success. Otherwise, returns false and puts the error > > * message in 'ds'. */ > > @@ -590,6 +709,8 @@ ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, > const char *s, struct ds *ds) > > char *pos, *key, *value, *copy; > > memset(tuple, 0, sizeof *tuple); > > > > + tuple->l3_type = AF_INET; > > + > > pos = copy = xstrdup(s); > > while (ofputil_parse_key_value(&pos, &key, &value)) { > > if (!*value) { > > @@ -597,28 +718,17 @@ ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, > const char *s, struct ds *ds) > > goto error; > > } > > > > - if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) { > > - if (tuple->l3_type && tuple->l3_type != AF_INET) { > > - ds_put_cstr(ds, "L3 type set multiple times"); > > - goto error; > > - } else { > > - tuple->l3_type = AF_INET; > > - } > > - if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip : > > - &tuple->dst.ip)) { > > - goto error_with_msg; > > - } > > - } else if (!strcmp(key, "ct_ipv6_src") || > > - !strcmp(key, "ct_ipv6_dst")) { > > - if (tuple->l3_type && tuple->l3_type != AF_INET6) { > > - ds_put_cstr(ds, "L3 type set multiple times"); > > + if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst") || > > + !strcmp(key, "ct_ipv6_src") || !strcmp(key, "ct_ipv6_dst")) > { > > + tuple->l3_type = key[6] == '6' ? AF_INET6 : AF_INET; > > + uint8_t index = key[6] == '6' ? 8 : 6; > > + union ct_dpif_inet_addr *addr = key[index] == 's' > > + ? &tuple->src > > + : &tuple->dst; > > + > > + if (!ct_dpif_parse_tuple_ip(addr, value, tuple->l3_type)) { > > + ds_put_format(ds, "%s is set multiple times", key); > > goto error; > > - } else { > > - tuple->l3_type = AF_INET6; > > - } > > - if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 : > > - &tuple->dst.in6)) { > > - goto error_with_msg; > > } > > } else if (!strcmp(key, "ct_nw_proto")) { > > char *err = str_to_u8(value, key, &tuple->ip_proto); > > @@ -665,17 +775,9 @@ ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, > const char *s, struct ds *ds) > > } > > } > > > > - if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) > || > > - !tuple->ip_proto) { > > - /* icmp_type, icmp_code, and icmp_id can be 0. */ > > - if (tuple->ip_proto != IPPROTO_ICMP && > > - tuple->ip_proto != IPPROTO_ICMPV6) { > > - if (!tuple->src_port || !tuple->dst_port) { > > - ds_put_cstr(ds, "at least one of the conntrack 5-tuple > fields " > > - "is missing."); > > - goto error; > > - } > > - } > > + if (!tuple->ip_proto && (tuple->src_port || tuple->dst_port)) { > > + ds_put_cstr(ds, "port is set without protocol"); > > + goto error; > > } > > > > free(copy); > > diff --git a/lib/dpctl.man b/lib/dpctl.man > > index 87ea8087b..b0cabe05d 100644 > > --- a/lib/dpctl.man > > +++ b/lib/dpctl.man > > @@ -312,7 +312,8 @@ If \fBzone\fR=\fIzone\fR is specified, only flushes > the connections in > > If \fIct-tuple\fR is provided, flushes the connection entry specified by > > \fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not > provided. > > The userspace connection tracker requires flushing with the original > pre-NATed > > -tuple and a warning log will be otherwise generated. > > +tuple and a warning log will be otherwise generated. The tuple can be > partial > > +and will remove all connections that are matching on the specified > fields. > > An example of an IPv4 ICMP \fIct-tuple\fR: > > .IP > > > "ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type=8,icmp_code=0,icmp_id=10" > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > > index 731de439c..81b32f348 100644 > > --- a/tests/system-traffic.at > > +++ b/tests/system-traffic.at > > @@ -2221,7 +2221,7 @@ AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep > "orig=.src=10\.1\.1\.1,"], [], > > > udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > ]) > > > > -AT_CHECK([ovs-appctl dpctl/flush-conntrack > 'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1']) > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack > 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2']) > > > > AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep > "orig=.src=10\.1\.1\.1,"], [1], [dnl > > ]) > > @@ -2233,7 +2233,7 @@ AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep > "orig=.src=10\.1\.1\.2,"], [0], > > > udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > ]) > > > > -AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 > 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2']) > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 > 'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1']) > > > > AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], > [dnl > > ]) > > @@ -2255,6 +2255,86 @@ AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 > $ICMP_TUPLE]) > > AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep > "orig=.src=10\.1\.1\.2,"], [1], [dnl > > ]) > > > > +dnl Test UDP from port 1 and 2, partial flush by src port > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > I see a failure for check-kernel target because the entries are shown in > the reverse order. I guess a "| sort" could be needed. > Ah right, done in v1, thanks. > > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack > 'ct_nw_proto=17,ct_tp_src=1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack > 'ct_nw_proto=17,ct_tp_src=2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > +dnl Test UDP from port 1 and 2, partial flush by dst port > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack > 'ct_nw_proto=17,ct_tp_dst=2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack > 'ct_nw_proto=17,ct_tp_dst=1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > +dnl Test UDP from port 1 and 2, partial flush by src address > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > +dnl Test UDP from port 1 and 2, partial flush by dst address > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 > actions=resubmit(,0)"]) > > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 > packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 > actions=resubmit(,0)"]) > > + > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.2']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], > [dnl > > > +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 > > +]) > > + > > +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.1']) > > + > > +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) > > + > > OVS_TRAFFIC_VSWITCHD_STOP > > AT_CLEANUP > > > > -- > > 2.37.2 > > > > _______________________________________________ > > dev mailing list > > d...@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > Thanks, Ales -- Ales Musil Senior Software Engineer - OVN Core Red Hat EMEA <https://www.redhat.com> amu...@redhat.com IM: amusil <https://red.ht/sig> _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev