On 1/12/23 18:17, Ales Musil wrote:
> Currently, the CT can be flushed by dpctl only by specifying
> the whole 5-tuple. This is not very convenient when there are
> only some fields known to the user of CT flush. Add new struct
> ofputil_ct_match which represents the generic filtering that can
> be done for CT flush. The match is done only on fields that are
> non-zero with exception to the icmp fields.
> 
> This allows the filtering just within dpctl, however
> it is a preparation for OpenFlow extension.
> 
> Reported-at: https://bugzilla.redhat.com/2120546
> Signed-off-by: Ales Musil <amu...@redhat.com>
> ---
> v6: Rebase on top of current master.
>     Address comments from Ilya.

Nit: this kind of version history is not really useful.  It should say
what actually changed.  I don't remember every comment I made, other
people have no idea what I was asking for.

> v5: Add ack from Paolo.
> v4: Fix a flush all scenario.
> v3: Rebase on top of master.
>     Address the C99 comment and missing dpif_close call.
> v2: Rebase on top of master.
>     Address comments from Paolo.
> ---
>  NEWS                           |   3 +
>  include/openvswitch/ofp-util.h |  28 +++
>  lib/automake.mk                |   2 +
>  lib/ct-dpif.c                  | 210 ++++++++++-------------
>  lib/ct-dpif.h                  |   4 +-
>  lib/dpctl.c                    |  43 +++--
>  lib/dpctl.man                  |  15 +-
>  lib/ofp-ct-util.c              | 303 +++++++++++++++++++++++++++++++++
>  lib/ofp-ct-util.h              |  40 +++++
>  tests/system-traffic.at        | 102 ++++++++++-
>  10 files changed, 611 insertions(+), 139 deletions(-)
>  create mode 100644 lib/ofp-ct-util.c
>  create mode 100644 lib/ofp-ct-util.h
> 
> diff --git a/NEWS b/NEWS
> index 2f6ededfe..d1f749d69 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -30,6 +30,9 @@ Post-v3.0.0
>     - Userspace datapath:
>       * Add '-secs' argument to appctl 'dpif-netdev/pmd-rxq-show' to show
>         the pmd usage of an Rx queue over a configurable time period.
> +   - ovs-dpctl and 'ovs-appctl dpctl/' commands:
> +     * "flush-conntrack" is capable of handling partial 5-tuple,

is now capable

> +        with additional optional parameter to specify the reply direction.
>  
>  
>  v3.0.0 - 15 Aug 2022
> diff --git a/include/openvswitch/ofp-util.h b/include/openvswitch/ofp-util.h
> index 091a09cad..84937ae26 100644
> --- a/include/openvswitch/ofp-util.h
> +++ b/include/openvswitch/ofp-util.h
> @@ -19,6 +19,9 @@
>  
>  #include <stdbool.h>
>  #include <stdint.h>
> +#include <sys/types.h>
> +#include <netinet/in.h>
> +
>  #include "openvswitch/ofp-protocol.h"
>  
>  struct ofp_header;
> @@ -27,6 +30,31 @@ struct ofp_header;
>  extern "C" {
>  #endif
>  
> +struct ofputil_ct_tuple {
> +    struct in6_addr src;
> +    struct in6_addr dst;
> +
> +    union {
> +        ovs_be16 src_port;
> +        ovs_be16 icmp_id;
> +    };
> +    union {
> +        ovs_be16 dst_port;
> +        struct {
> +            uint8_t icmp_code;
> +            uint8_t icmp_type;
> +        };
> +    };
> +};
> +
> +struct ofputil_ct_match {
> +    uint8_t ip_proto;
> +    uint16_t l3_type;
> +
> +    struct ofputil_ct_tuple tuple_orig;
> +    struct ofputil_ct_tuple tuple_reply;
> +};
> +

These structures are defined here in the public header, but all the
functions that are using these structures are not exported, so
dynamicly linked with libopenvswitch application will not be able
to use these structures anyway.  A single 'encode' function will
be exported in the next patch, but we need other functions as well.
OVS typlically exports igh-level encode/decode/parse/format functions.

We need a new header for all that, including structures.
Smething like /include/openvswitch/ofp-conntrack.h or ofp-ct.h.
ofp-util.h doesn't seem to be a right place.

>  bool ofputil_decode_hello(const struct ofp_header *,
>                            uint32_t *allowed_versions);
>  struct ofpbuf *ofputil_encode_hello(uint32_t version_bitmap);
> diff --git a/lib/automake.mk b/lib/automake.mk
> index 61bdc308f..c3bb60830 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -226,6 +226,8 @@ lib_libopenvswitch_la_SOURCES = \
>       lib/ofp-actions.c \
>       lib/ofp-bundle.c \
>       lib/ofp-connection.c \
> +     lib/ofp-ct-util.c \
> +     lib/ofp-ct-util.h \
>       lib/ofp-ed-props.c \
>       lib/ofp-errors.c \
>       lib/ofp-flow.c \
> diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
> index 6f17a26b5..ae94c8cbd 100644
> --- a/lib/ct-dpif.c
> +++ b/lib/ct-dpif.c
> @@ -20,6 +20,7 @@
>  #include <errno.h>
>  
>  #include "ct-dpif.h"
> +#include "ofp-ct-util.h"
>  #include "openvswitch/ofp-parse.h"
>  #include "openvswitch/vlog.h"
>  
> @@ -71,6 +72,31 @@ ct_dpif_dump_start(struct dpif *dpif, struct 
> ct_dpif_dump_state **dump,
>      return err;
>  }
>  
> +static void
> +ct_dpif_tuple_from_ofputil_ct_tuple(const struct ofputil_ct_tuple *ofp_tuple,
> +                                    struct ct_dpif_tuple *tuple,
> +                                    uint16_t l3_type, uint8_t ip_proto)
> +{
> +    if (l3_type == AF_INET) {
> +        tuple->src.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->src);
> +        tuple->dst.ip = in6_addr_get_mapped_ipv4(&ofp_tuple->dst);
> +    } else {
> +        tuple->src.in6 = ofp_tuple->src;
> +        tuple->dst.in6 = ofp_tuple->dst;
> +    }
> +
> +    tuple->l3_type = l3_type;
> +    tuple->ip_proto = ip_proto;
> +    tuple->src_port = ofp_tuple->src_port;
> +
> +    if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
> +        tuple->icmp_code = ofp_tuple->icmp_code;
> +        tuple->icmp_type = ofp_tuple->icmp_type;
> +    } else {
> +        tuple->dst_port = ofp_tuple->dst_port;
> +    }
> +}
> +
>  /* Dump one connection from a tracker, and put it in 'entry'.
>   *
>   * 'dump' should have been initialized by ct_dpif_dump_start().
> @@ -100,25 +126,79 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump)
>              ? dpif->dpif_class->ct_dump_done(dpif, dump)
>              : EOPNOTSUPP);
>  }
> -
> +

Keep the line feed character.

> +static int
> +ct_dpif_flush_tuple(struct dpif *dpif, const uint16_t *zone,
> +                    const struct ofputil_ct_match *match) {

'{' should be on a separate line.

> +    struct ct_dpif_dump_state *dump;
> +    struct ct_dpif_entry cte;
> +    int error;
> +    int tot_bkts;
> +
> +    if (!dpif->dpif_class->ct_flush) {
> +        return EOPNOTSUPP;
> +    }
> +
> +    if (VLOG_IS_DBG_ENABLED()) {
> +        struct ds ds = DS_EMPTY_INITIALIZER;
> +        ofputil_ct_match_format(&ds, match);
> +        VLOG_DBG("%s: ct_flush:zone=%d %s", dpif_name(dpif), zone ? *zone : 
> 0,

s/flush:zone/flush: zone/

> +                 ds_cstr(&ds));
> +        ds_destroy(&ds);
> +    }
> +
> +    /* If we have full five tuple in original and empty reply tuple just
> +     * do the flush over original tuple directly. */
> +    if (ofputil_ct_tuple_is_five_tuple(&match->tuple_orig, match->ip_proto) 
> &&
> +        ofputil_ct_tuple_is_zero(&match->tuple_reply, match->ip_proto)) {
> +        struct ct_dpif_tuple tuple;

Empty line.

> +        ct_dpif_tuple_from_ofputil_ct_tuple(&match->tuple_orig, &tuple,
> +                                            match->l3_type, match->ip_proto);
> +        return dpif->dpif_class->ct_flush(dpif, zone, &tuple);
> +    }
> +
> +    error = ct_dpif_dump_start(dpif, &dump, zone, &tot_bkts);
> +    if (error) {
> +        return error;
> +    }
> +
> +    while (!(error = ct_dpif_dump_next(dump, &cte))) {
> +        if (zone && *zone != cte.zone) {
> +            continue;
> +        }
> +
> +        if (ofputil_ct_match_cmp(match, &cte)) {
> +            error = dpif->dpif_class->ct_flush(dpif, &cte.zone,
> +                                               &cte.tuple_orig);
> +            if (error) {
> +                break;
> +            }
> +        }
> +    }
> +    if (error == EOF) {
> +        error = 0;
> +    }
> +
> +    ct_dpif_dump_done(dump);
> +    return error;
> +}
> +
>  /* Flush the entries in the connection tracker used by 'dpif'.  The
>   * arguments have the following behavior:
>   *
> - *   - If both 'zone' and 'tuple' are NULL, flush all the conntrack entries.
> - *   - If 'zone' is not NULL, and 'tuple' is NULL, flush all the conntrack
> + *   - If both 'zone' is NULL and 'match' is NULL or zero,
> + *     flush all the conntrack entries.
> + *   - If 'zone' is not NULL, and 'match' is NULL, flush all the conntrack
>   *     entries in '*zone'.
> - *   - If 'tuple' is not NULL, flush the conntrack entry specified by 'tuple'
> + *   - If 'match' is not NULL or zero, flush the conntrack entry specified
> + *     by 'match'.

The sentense shouldn't stop here, I suppose.

>   *     in '*zone'. If 'zone' is NULL, use the default zone (zone 0). */
>  int
>  ct_dpif_flush(struct dpif *dpif, const uint16_t *zone,
> -              const struct ct_dpif_tuple *tuple)
> +              const struct ofputil_ct_match *match)
>  {
> -    if (tuple) {
> -        struct ds ds = DS_EMPTY_INITIALIZER;
> -        ct_dpif_format_tuple(&ds, tuple);
> -        VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), 
> ds_cstr(&ds),
> -                                                zone ? *zone : 0);
> -        ds_destroy(&ds);
> +    if (match && !ofputil_is_ct_match_zero(match)) {
> +        return ct_dpif_flush_tuple(dpif, zone, match);
>      } else if (zone) {
>          VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone);
>      } else {
> @@ -126,7 +206,7 @@ ct_dpif_flush(struct dpif *dpif, const uint16_t *zone,
>      }
>  
>      return (dpif->dpif_class->ct_flush
> -            ? dpif->dpif_class->ct_flush(dpif, zone, tuple)
> +            ? dpif->dpif_class->ct_flush(dpif, zone, NULL)
>              : EOPNOTSUPP);
>  }
>  
> @@ -583,112 +663,6 @@ ct_dpif_format_tcp_stat(struct ds * ds, int tcp_state, 
> int conn_per_state)
>      ds_put_format(ds, "=%u", conn_per_state);
>  }
>  
> -/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'.
> - * Returns true on success.  Otherwise, returns false and puts the error
> - * message in 'ds'. */
> -bool
> -ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds 
> *ds)
> -{
> -    char *pos, *key, *value, *copy;
> -    memset(tuple, 0, sizeof *tuple);
> -
> -    pos = copy = xstrdup(s);
> -    while (ofputil_parse_key_value(&pos, &key, &value)) {
> -        if (!*value) {
> -            ds_put_format(ds, "field %s missing value", key);
> -            goto error;
> -        }
> -
> -        if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) {
> -            if (tuple->l3_type && tuple->l3_type != AF_INET) {
> -                ds_put_cstr(ds, "L3 type set multiple times");
> -                goto error;
> -            } else {
> -                tuple->l3_type = AF_INET;
> -            }
> -            if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip :
> -                                                 &tuple->dst.ip)) {
> -                goto error_with_msg;
> -            }
> -        } else if (!strcmp(key, "ct_ipv6_src") ||
> -                   !strcmp(key, "ct_ipv6_dst")) {
> -            if (tuple->l3_type && tuple->l3_type != AF_INET6) {
> -                ds_put_cstr(ds, "L3 type set multiple times");
> -                goto error;
> -            } else {
> -                tuple->l3_type = AF_INET6;
> -            }
> -            if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 :
> -                                                   &tuple->dst.in6)) {
> -                goto error_with_msg;
> -            }
> -        } else if (!strcmp(key, "ct_nw_proto")) {
> -            char *err = str_to_u8(value, key, &tuple->ip_proto);
> -            if (err) {
> -                free(err);
> -                goto error_with_msg;
> -            }
> -        } else if (!strcmp(key, "ct_tp_src") || !strcmp(key,"ct_tp_dst")) {
> -            uint16_t port;
> -            char *err = str_to_u16(value, key, &port);
> -            if (err) {
> -                free(err);
> -                goto error_with_msg;
> -            }
> -            if (key[6] == 's') {
> -                tuple->src_port = htons(port);
> -            } else {
> -                tuple->dst_port = htons(port);
> -            }
> -        } else if (!strcmp(key, "icmp_type") || !strcmp(key, "icmp_code") ||
> -                   !strcmp(key, "icmp_id") ) {
> -            if (tuple->ip_proto != IPPROTO_ICMP &&
> -                tuple->ip_proto != IPPROTO_ICMPV6) {
> -                ds_put_cstr(ds, "invalid L4 fields");
> -                goto error;
> -            }
> -            uint16_t icmp_id;
> -            char *err;
> -            if (key[5] == 't') {
> -                err = str_to_u8(value, key, &tuple->icmp_type);
> -            } else if (key[5] == 'c') {
> -                err = str_to_u8(value, key, &tuple->icmp_code);
> -            } else {
> -                err = str_to_u16(value, key, &icmp_id);
> -                tuple->icmp_id = htons(icmp_id);
> -            }
> -            if (err) {
> -                free(err);
> -                goto error_with_msg;
> -            }
> -        } else {
> -            ds_put_format(ds, "invalid conntrack tuple field: %s", key);
> -            goto error;
> -        }
> -    }
> -
> -    if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) ||
> -        !tuple->ip_proto) {
> -        /* icmp_type, icmp_code, and icmp_id can be 0. */
> -        if (tuple->ip_proto != IPPROTO_ICMP &&
> -            tuple->ip_proto != IPPROTO_ICMPV6) {
> -            if (!tuple->src_port || !tuple->dst_port) {
> -                ds_put_cstr(ds, "at least one of the conntrack 5-tuple 
> fields "
> -                                "is missing.");
> -                goto error;
> -            }
> -        }
> -    }
> -
> -    free(copy);
> -    return true;
> -
> -error_with_msg:
> -    ds_put_format(ds, "failed to parse field %s", key);
> -error:
> -    free(copy);
> -    return false;
> -}
>  
>  void
>  ct_dpif_push_zone_limit(struct ovs_list *zone_limits, uint16_t zone,
> diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
> index 2848549b0..337c99dd4 100644
> --- a/lib/ct-dpif.h
> +++ b/lib/ct-dpif.h
> @@ -17,6 +17,7 @@
>  #ifndef CT_DPIF_H
>  #define CT_DPIF_H
>  
> +#include "openvswitch/ofp-util.h"

No need to include.  Just define a type.

>  #include "openvswitch/types.h"
>  #include "packets.h"
>  

struct ofputil_ct_match;

> @@ -285,7 +286,7 @@ int ct_dpif_dump_start(struct dpif *, struct 
> ct_dpif_dump_state **,
>  int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *);
>  int ct_dpif_dump_done(struct ct_dpif_dump_state *);
>  int ct_dpif_flush(struct dpif *, const uint16_t *zone,
> -                  const struct ct_dpif_tuple *);
> +                  const struct ofputil_ct_match *);
>  int ct_dpif_set_maxconns(struct dpif *dpif, uint32_t maxconns);
>  int ct_dpif_get_maxconns(struct dpif *dpif, uint32_t *maxconns);
>  int ct_dpif_get_nconns(struct dpif *dpif, uint32_t *nconns);
> @@ -311,7 +312,6 @@ void ct_dpif_format_ipproto(struct ds *ds, uint16_t 
> ipproto);
>  void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *);
>  uint8_t ct_dpif_coalesce_tcp_state(uint8_t state);
>  void ct_dpif_format_tcp_stat(struct ds *, int, int);
> -bool ct_dpif_parse_tuple(struct ct_dpif_tuple *, const char *s, struct ds *);
>  void ct_dpif_push_zone_limit(struct ovs_list *, uint16_t zone, uint32_t 
> limit,
>                               uint32_t count);
>  void ct_dpif_free_zone_limits(struct ovs_list *);
> diff --git a/lib/dpctl.c b/lib/dpctl.c
> index 29041fa3e..a1f6a9a5d 100644
> --- a/lib/dpctl.c
> +++ b/lib/dpctl.c
> @@ -40,6 +40,7 @@
>  #include "netdev.h"
>  #include "netlink.h"
>  #include "odp-util.h"
> +#include "ofp-ct-util.h"
>  #include "openvswitch/ofpbuf.h"
>  #include "packets.h"
>  #include "openvswitch/shash.h"
> @@ -1707,37 +1708,55 @@ dpctl_flush_conntrack(int argc, const char *argv[],
>                        struct dpctl_params *dpctl_p)
>  {
>      struct dpif *dpif = NULL;
> -    struct ct_dpif_tuple tuple, *ptuple = NULL;
> +    struct ofputil_ct_match match = {0};
>      struct ds ds = DS_EMPTY_INITIALIZER;
>      uint16_t zone, *pzone = NULL;
>      int error;
>      int args = argc - 1;
>  
> -    /* Parse ct tuple */
> -    if (args && ct_dpif_parse_tuple(&tuple, argv[args], &ds)) {
> -        ptuple = &tuple;
> +    /* Parse zone */

Period at the end of a comment.

> +    if (args && !strncmp(argv[1], "zone=", 5)) {
> +        if (!ovs_scan(argv[1], "zone=%"SCNu16, &zone)) {
> +            ds_put_cstr(&ds, "failed to parse zone");
> +            error = EINVAL;
> +            goto error;
> +        }
> +        pzone = &zone;
>          args--;
>      }
>  
> -    /* Parse zone */
> -    if (args && ovs_scan(argv[args], "zone=%"SCNu16, &zone)) {
> -        pzone = &zone;
> +    /* Parse ct tuples */

Period at the end of a comment.

> +    for (int i = 0; i < 2; i++) {
> +        if (!args) {
> +            break;
> +        }
> +
> +        struct ofputil_ct_tuple *tuple =
> +            i ? &match.tuple_reply : &match.tuple_orig;
> +        const char *arg = argv[argc - args];
> +
> +        if (arg[0] && !ofputil_ct_tuple_parse(tuple, arg, &ds, 
> &match.ip_proto,
> +                                              &match.l3_type)) {
> +            error = EINVAL;
> +            goto error;
> +        }
>          args--;
>      }
>  
> -    /* Report error if there are more than one unparsed argument. */
> +    /* Report error if there is more than one unparsed argument. */
>      if (args > 1) {
>          ds_put_cstr(&ds, "invalid arguments");
>          error = EINVAL;
>          goto error;
>      }
>  
> -    error = opt_dpif_open(argc, argv, dpctl_p, 4, &dpif);
> +    error = opt_dpif_open(argc, argv, dpctl_p, 5, &dpif);
>      if (error) {
> +        dpctl_error(dpctl_p, error, "Cannot open dpif");
>          return error;
>      }
>  
> -    error = ct_dpif_flush(dpif, pzone, ptuple);
> +    error = ct_dpif_flush(dpif, pzone, &match);
>      if (!error) {
>          dpif_close(dpif);
>          return 0;
> @@ -2862,8 +2881,8 @@ static const struct dpctl_command all_commands[] = {
>        0, 1, dpctl_offload_stats_show, DP_RO },
>      { "dump-conntrack", "[-m] [-s] [dp] [zone=N]",
>        0, 4, dpctl_dump_conntrack, DP_RO },
> -    { "flush-conntrack", "[dp] [zone=N] [ct-tuple]", 0, 3,
> -      dpctl_flush_conntrack, DP_RW },
> +    { "flush-conntrack", "[dp] [zone=N] [ct-origin-tuple] [ct-reply-tuple]",
> +      0, 4, dpctl_flush_conntrack, DP_RW },
>      { "cache-get-size", "[dp]", 0, 1, dpctl_cache_get_size, DP_RO },
>      { "cache-set-size", "dp cache <size>", 3, 3, dpctl_cache_set_size, DP_RW 
> },
>      { "ct-stats-show", "[dp] [zone=N]",
> diff --git a/lib/dpctl.man b/lib/dpctl.man
> index 87ea8087b..587f45403 100644
> --- a/lib/dpctl.man
> +++ b/lib/dpctl.man
> @@ -302,22 +302,25 @@ are included. With \fB\-\-statistics\fR timeouts and 
> timestamps are
>  added to the output.
>  .
>  .TP
> -\*(DX\fBflush\-conntrack\fR [\fIdp\fR] [\fBzone=\fIzone\fR] [\fIct-tuple\fR]
> +\*(DX\fBflush\-conntrack\fR [\fIdp\fR] [\fBzone=\fIzone\fR] 
> [\fIct-origin-tuple\fR] [\fIct-reply-tuple\fR]
>  Flushes the connection entries in the tracker used by \fIdp\fR based on
> -\fIzone\fR and connection tracking tuple \fIct-tuple\fR.
> +\fIzone\fR and connection tracking tuple \fIct-origin-tuple\fR.
>  If \fIct-tuple\fR is not provided, flushes all the connection entries.
>  If \fBzone\fR=\fIzone\fR is specified, only flushes the connections in
>  \fIzone\fR.
>  .IP
> -If \fIct-tuple\fR is provided, flushes the connection entry specified by
> +If \fIct-[origin|reply]-tuple\fR is provided, flushes the connection entry 
> specified by

line length.

>  \fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not provided.

these is no 'ct-tuple' argument.

>  The userspace connection tracker requires flushing with the original 
> pre-NATed
> -tuple and a warning log will be otherwise generated.
> -An example of an IPv4 ICMP \fIct-tuple\fR:
> +tuple and a warning log will be otherwise generated. The tuple can be partial
> +and will remove all connections that are matching on the specified fields.
> +In order to specify only \fIct-reply-tuple\fR provide empty string
> +as \fIct-origin-tuple\fR.
> +An example of an IPv4 ICMP \fIct-[origin|reply]-tuple\fR:
>  .IP
>  
> "ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type=8,icmp_code=0,icmp_id=10"
>  .IP
> -An example of an IPv6 TCP \fIct-tuple\fR:
> +An example of an IPv6 TCP \fIct-[origin|reply]-tuple\fR:
>  .IP
>  
> "ct_ipv6_src=fc00::1,ct_ipv6_dst=fc00::2,ct_nw_proto=6,ct_tp_src=1,ct_tp_dst=2"
>  .
> diff --git a/lib/ofp-ct-util.c b/lib/ofp-ct-util.c
> new file mode 100644
> index 000000000..790d5dbec
> --- /dev/null
> +++ b/lib/ofp-ct-util.c
> @@ -0,0 +1,303 @@
> +
> +/* Copyright (c) 2022, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <sys/types.h>
> +#include <netinet/in.h>
> +#include <netinet/icmp6.h>
> +
> +#include "ct-dpif.h"
> +#include "ofp-ct-util.h"
> +#include "openvswitch/dynamic-string.h"
> +#include "openvswitch/ofp-parse.h"
> +#include "openvswitch/ofp-util.h"
> +#include "openvswitch/packets.h"
> +
> +static inline bool
> +ofputil_ct_inet_addr_cmp_partial(const struct in6_addr *partial,
> +                                 const union ct_dpif_inet_addr *addr,
> +                                 const uint16_t l3_type)
> +{
> +    if (ipv6_is_zero(partial)) {
> +        return true;
> +    }
> +
> +    if (l3_type == AF_INET && in6_addr_get_mapped_ipv4(partial) != addr->ip) 
> {
> +        return false;
> +    }
> +
> +    if (l3_type == AF_INET6 && !ipv6_addr_equals(partial, &addr->in6)) {
> +        return false;
> +    }
> +
> +    return true;
> +}
> +
> +static inline bool
> +ofputil_ct_tuple_ip_cmp_partial(const struct ofputil_ct_tuple *partial,
> +                                const struct ct_dpif_tuple *tuple,
> +                                const uint16_t l3_type, const uint8_t 
> ip_proto)
> +{
> +    if (!ofputil_ct_inet_addr_cmp_partial(&partial->src,
> +                                          &tuple->src, l3_type)) {
> +        return false;
> +    }
> +
> +    if (!ofputil_ct_inet_addr_cmp_partial(&partial->dst,
> +                                          &tuple->dst, l3_type)) {
> +        return false;
> +    }
> +
> +    if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
> +        if (partial->icmp_id != tuple->icmp_id) {
> +            return false;
> +        }
> +
> +        if (partial->icmp_type != tuple->icmp_type) {
> +            return false;
> +        }
> +
> +        if (partial->icmp_code != tuple->icmp_code) {
> +            return false;
> +        }
> +    } else {
> +        if (partial->src_port && partial->src_port != tuple->src_port) {
> +            return false;
> +        }
> +
> +        if (partial->dst_port && partial->dst_port != tuple->dst_port) {
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
> +/* Compares the non-zero members if they match. This is useful for clearing

Maybe 'Returns 'true' is all non-zero members of 'match' equal to corresponding
members of 'entry'.' ?

> + * up all connections specified by a partial tuples for orig/reply. */
> +bool
> +ofputil_ct_match_cmp(const struct ofputil_ct_match *match,
> +                     const struct ct_dpif_entry *entry)

'struct ofputil_ct_match' is an externally visible strucuture,
'struct ct_dpif_entry' is an OVS-only internal structure.
So, the function should live in lib/ct-dpif.c instead.  And it
only used there anyway, so can even be static.

> +{
> +    if (match->l3_type && match->l3_type != entry->tuple_orig.l3_type) {
> +        return false;
> +    }
> +
> +    if (match->ip_proto && match->ip_proto != entry->tuple_orig.ip_proto) {
> +        return false;
> +    }
> +
> +    if (!ofputil_ct_tuple_ip_cmp_partial(&match->tuple_orig,
> +                                         &entry->tuple_orig,
> +                                         match->l3_type, match->ip_proto)) {
> +        return false;
> +    }
> +
> +    if (!ofputil_ct_tuple_ip_cmp_partial(&match->tuple_reply,
> +                                         &entry->tuple_reply,
> +                                         match->l3_type, match->ip_proto)) {
> +        return false;
> +    }
> +
> +    return true;
> +}
> +
> +static void
> +ofputil_ct_tuple_format(struct ds *ds, const struct ofputil_ct_tuple *tuple,
> +                        uint8_t ip_proto, uint16_t l3_type)
> +{
> +    ds_put_cstr(ds, l3_type == AF_INET ? "ct_nw_src=": "ct_ipv6_src=");
> +    ipv6_format_mapped(&tuple->src, ds);
> +    ds_put_cstr(ds, l3_type == AF_INET ? ",ct_nw_dst=": ",ct_ipv6_dst=");
> +    ipv6_format_mapped(&tuple->dst, ds);
> +    if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
> +        ds_put_format(ds, ",icmp_id=%u,icmp_type=%u,icmp_code=%u",
> +                      ntohs(tuple->icmp_id), tuple->icmp_type,
> +                      tuple->icmp_code);
> +
> +    } else {
> +        ds_put_format(ds, ",ct_tp_src=%u,ct_tp_dst=%u", 
> ntohs(tuple->src_port),
> +                      ntohs(tuple->dst_port));
> +    }
> +}
> +
> +bool
> +ofputil_ct_tuple_is_zero(const struct ofputil_ct_tuple *tuple, uint8_t 
> ip_proto)
> +{
> +    bool is_zero = ipv6_is_zero(&tuple->src) && ipv6_is_zero(&tuple->dst);
> +
> +    if (!(ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6)) {
> +        is_zero = is_zero && !tuple->src_port && !tuple->dst_port;
> +    }
> +
> +    return is_zero;

Do we require ICMP tuples to always have at least one address specified?
Or should the 'if' above have an 'else { return false; }' ?

> +}
> +
> +bool
> +ofputil_ct_tuple_is_five_tuple(const struct ofputil_ct_tuple *tuple,
> +                               uint8_t ip_proto)
> +{
> +    /* First check if we have address. */
> +    bool five_tuple = !ipv6_is_zero(&tuple->src) && 
> !ipv6_is_zero(&tuple->dst);
> +
> +    if (!(ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6)) {
> +        five_tuple = five_tuple && tuple->src_port && tuple->dst_port;
> +    }
> +
> +    return five_tuple;
> +}
> +
> +bool
> +ofputil_is_ct_match_zero(const struct ofputil_ct_match *match)
> +{
> +    return !match->ip_proto && !match->l3_type &&
> +           ofputil_ct_tuple_is_zero(&match->tuple_orig, match->ip_proto) &&
> +           ofputil_ct_tuple_is_zero(&match->tuple_reply, match->ip_proto);
> +}
> +
> +void
> +ofputil_ct_match_format(struct ds *ds, const struct ofputil_ct_match *match)
> +{
> +    ds_put_cstr(ds, "'");
> +    ofputil_ct_tuple_format(ds, &match->tuple_orig, match->ip_proto,
> +                            match->l3_type);
> +    ds_put_format(ds, ",ct_nw_proto=%u' '", match->ip_proto);
> +    ofputil_ct_tuple_format(ds, &match->tuple_reply, match->ip_proto,
> +                            match->l3_type);
> +    ds_put_cstr(ds, "'");
> +}
> +
> +/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'.
> + * Returns true on success.  Otherwise, returns false and puts the error
> + * message in 'ds'. */
> +bool
> +ofputil_ct_tuple_parse(struct ofputil_ct_tuple *tuple, const char *s,
> +                       struct ds *ds, uint8_t *ip_proto, uint16_t *l3_type)
> +{
> +    char *pos, *key, *value, *copy;

An empty line here.

> +    pos = copy = xstrdup(s);
> +    while (ofputil_parse_key_value(&pos, &key, &value)) {
> +        if (!*value) {
> +            ds_put_format(ds, "field %s missing value", key);
> +            goto error;
> +        }
> +
> +        if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) {
> +            struct in6_addr *addr = key[6] == 's' ? &tuple->src : 
> &tuple->dst;
> +
> +            if (*l3_type && *l3_type != AF_INET) {
> +                ds_put_format(ds ,"the L3 protocol does not match %s", 
> value);
> +                goto error;
> +            }
> +
> +            if (!ipv6_is_zero(addr)) {
> +                ds_put_format(ds, "%s is set multiple times", key);
> +                goto error;
> +            }
> +
> +            ovs_be32 ip = 0;
> +            if (!ip_parse(value, &ip)) {
> +                goto error_with_msg;
> +            }
> +
> +            *l3_type = AF_INET;
> +            *addr = in6_addr_mapped_ipv4(ip);
> +        } else if (!strcmp(key, "ct_ipv6_src") ||
> +                   !strcmp(key, "ct_ipv6_dst")) {
> +            struct in6_addr *addr = key[8] == 's' ? &tuple->src : 
> &tuple->dst;
> +
> +            if (*l3_type &&  *l3_type != AF_INET6) {

Too many spaces.

> +                ds_put_format(ds ,"the L3 protocol does not match %s", 
> value);

s/ds ,/ds, /

> +                goto error;
> +            }
> +
> +            if (!ipv6_is_zero(addr)) {
> +                ds_put_format(ds, "%s is set multiple times", key);
> +                goto error;
> +            }
> +
> +

Too many empty lines.

> +            if (!ipv6_parse(value, addr)) {
> +                goto error_with_msg;
> +            }
> +
> +            *l3_type = AF_INET6;
> +        } else if (!strcmp(key, "ct_nw_proto")) {
> +            if (*ip_proto) {
> +                ds_put_format(ds, "%s is set multiple times", key);
> +            }
> +            char *err = str_to_u8(value, key, ip_proto);
> +
> +            if (err) {
> +                free(err);
> +                goto error_with_msg;
> +            }
> +        } else if (!strcmp(key, "ct_tp_src") || !strcmp(key, "ct_tp_dst")) {
> +            uint16_t port;
> +            char *err = str_to_u16(value, key, &port);
> +
> +            if (err) {
> +                free(err);
> +                goto error_with_msg;
> +            }
> +            if (key[6] == 's') {
> +                tuple->src_port = htons(port);
> +            } else {
> +                tuple->dst_port = htons(port);
> +            }
> +        } else if (!strcmp(key, "icmp_type") || !strcmp(key, "icmp_code") ||
> +                   !strcmp(key, "icmp_id")) {
> +            if (*ip_proto != IPPROTO_ICMP && *ip_proto != IPPROTO_ICMPV6) {
> +                ds_put_cstr(ds, "invalid L4 fields");
> +                goto error;
> +            }
> +            uint16_t icmp_id;
> +            char *err;
> +
> +            if (key[5] == 't') {
> +                err = str_to_u8(value, key, &tuple->icmp_type);
> +            } else if (key[5] == 'c') {
> +                err = str_to_u8(value, key, &tuple->icmp_code);
> +            } else {
> +                err = str_to_u16(value, key, &icmp_id);
> +                tuple->icmp_id = htons(icmp_id);
> +            }
> +            if (err) {
> +                free(err);
> +                goto error_with_msg;
> +            }
> +        } else {
> +            ds_put_format(ds, "invalid conntrack tuple field: %s", key);
> +            goto error;
> +        }
> +    }
> +
> +    if (!*ip_proto && (tuple->src_port || tuple->dst_port)) {
> +        ds_put_cstr(ds, "port is set without protocol");
> +        goto error;
> +    }
> +
> +    free(copy);
> +    return true;
> +
> +error_with_msg:
> +    ds_put_format(ds, "failed to parse field %s", key);
> +error:
> +    free(copy);
> +    return false;
> +}
> diff --git a/lib/ofp-ct-util.h b/lib/ofp-ct-util.h
> new file mode 100644
> index 000000000..1a219787d
> --- /dev/null
> +++ b/lib/ofp-ct-util.h
> @@ -0,0 +1,40 @@
> +/* Copyright (c) 2022, Red Hat, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef OVS_OFP_CT_UTIL_H
> +#define OVS_OFP_CT_UTIL_H
> +
> +#include "ct-dpif.h"
> +#include "openvswitch/ofp-util.h"
> +
> +bool ofputil_ct_match_cmp(const struct ofputil_ct_match *match,
> +                                 const struct ct_dpif_entry *entry);
> +
> +bool ofputil_ct_tuple_is_zero(const struct ofputil_ct_tuple *tuple,
> +                              uint8_t ip_proto);
> +
> +bool ofputil_ct_tuple_is_five_tuple(const struct ofputil_ct_tuple *tuple,
> +                                    uint8_t ip_proto);
> +
> +void ofputil_ct_match_format(struct ds *ds,
> +                             const struct ofputil_ct_match *match);
> +
> +bool ofputil_ct_tuple_parse(struct ofputil_ct_tuple *tuple, const char *s,
> +                            struct ds *ds, uint8_t *ip_proto,
> +                            uint16_t *l3_type);
> +
> +bool ofputil_is_ct_match_zero(const struct ofputil_ct_match *match);
> +
> +#endif /* lib/ofp-ct-util.h */
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 08c78ff57..e7ec1d96b 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -2278,7 +2278,7 @@ 
> udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>  
> -AT_SETUP([conntrack - ct flush by 5-tuple])
> +AT_SETUP([conntrack - ct flush])
>  CHECK_CONNTRACK()
>  OVS_TRAFFIC_VSWITCHD_START()
>  
> @@ -2339,6 +2339,106 @@ AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 
> $ICMP_TUPLE])
>  AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"], 
> [1], [dnl
>  ])
>  
> +dnl Test UDP from port 1 and 2, partial flush by src port
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
>  actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
>  actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], 
> [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_src=2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by dst port
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
>  actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
>  actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], 
> [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_proto=17,ct_tp_dst=1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by src address
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
>  actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
>  actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], 
> [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by dst address
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
>  actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
>  actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], 
> [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_dst=10.1.1.1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
> +dnl Test UDP from port 1 and 2, partial flush by src address in reply 
> direction
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000
>  actions=resubmit(,0)"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 
> packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000
>  actions=resubmit(,0)"])
> +
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], 
> [dnl
> +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack '' 'ct_nw_src=10.1.1.2'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [0], [dnl
> +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
> +])
> +
> +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 '' 'ct_nw_src=10.1.1.1'])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1])
> +
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>  


_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Reply via email to