On Mon, Apr 13, 2020 at 4:32 PM Yi-Hung Wei <yihung....@gmail.com> wrote:
>
> On Tue, Apr 7, 2020 at 2:08 PM William Tu <u9012...@gmail.com> wrote:
> >
> > Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout
> > policy support") adds conntrack timeout policy for kernel datapath.
> > This patch enables support for the userspace datapath.  I tested
> > using the 'make check-system-userspace' which checks the timeout
> > policies for ICMP and UDP cases.
> >
> > Signed-off-by: William Tu <u9012...@gmail.com>
>
> Thanks William for implementing this feature in the userspace. I have
> some comments as below.
>
>
> > diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
> > index b3507bd1c7fa..ffcda37f9985 100644
> > --- a/Documentation/faq/releases.rst
> > +++ b/Documentation/faq/releases.rst
> > @@ -118,7 +118,7 @@ Q: Are all features available with all datapaths?
> >      ========================== ============== ============== ========= 
> > =======
> >      Connection tracking             4.3            2.5          2.6      
> > YES
> >      Conntrack Fragment Reass.       4.3            2.6          2.12     
> > YES
> > -    Conntrack Timeout Policies      5.2            2.12         NO       NO
> > +    Conntrack Timeout Policies      5.2            2.12         2.13     NO
>
> For the support of userspace timeout policies in userspace datapath,
> should it be 2.14?
>
Yes, thanks!

>
>
> > diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> > index 9a8ca3910157..852482303684 100644
> > --- a/lib/conntrack-private.h
> > +++ b/lib/conntrack-private.h
> > @@ -118,6 +118,8 @@ struct conn {
> >      /* Immutable data. */
> >      bool alg_related; /* True if alg data connection. */
> >      enum ct_conn_type conn_type;
> > +
> > +    uint32_t tpid; /* Timeout policy ID. */
>
> I have a minor suggestion here.  I would recommend to replace tpid
> with tp_id so that when we grep tp_it it would not be confused with
> vlan's tpid (tunnel protocol id) field.

OK, make sense.

>
>
> > @@ -143,9 +145,9 @@ enum ct_update_res {
> >      CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
> >      CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
> >
> > -/* The smallest of the above values: it is used as an upper bound for the
> > - * interval between two rounds of cleanup of expired entries */
> > -#define CT_TM_MIN (30 * 1000)
> > +/* This is used as an upper bound for the interval between two
> > + * rounds of cleanup of expired entries */
> > +#define CT_TM_MIN (1 * 1000)
>
> I understand that while this patch enables users to customize the
> timeout policy, users can set the minimum timeout configuration to 1
> second. Thus, updates CT_TM_MIN to 1 second would make the conntrack
> clean thread to check timer expiration every single second to meet
> that requirement.  However, this may increase the system load even
> without any customized timeout policy.  I would recommend to update
> CT_TM_MIN dynamically to the minimum customized timeout value on the
> fly.

ok, will do it in next version.
>
>
> > diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
> > new file mode 100644
> > index 000000000000..6be4b8276340
> > --- /dev/null
> > +++ b/lib/conntrack-tp.c
> > @@ -0,0 +1,247 @@
> > +/*
> > + * Copyright (c) 2020 VMware, Inc.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#include <config.h>
> > +
> > +#include "conntrack-private.h"
> > +#include "conntrack-tp.h"
> > +#include "dp-packet.h"
> > +
> > +#include "openvswitch/vlog.h"
> > +VLOG_DEFINE_THIS_MODULE(conntrack_tp);
> > +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> > +
> > +static const char *ct_timeout_str[] = {
> > +#define CT_TIMEOUT(NAME, VALUE) #NAME
> > +    CT_TIMEOUTS
> > +#undef CT_TIMEOUT
> > +};
> > +
> > +static inline bool
> > +check_present_and_set(struct timeout_policy *tp, uint32_t *v,
> > +                      enum ct_dpif_tp_attr attr)
> > +{
> > +    if (tp && (tp->p.present & (1 << attr))) {
> > +        *v = tp->p.attrs[attr];
> > +        return true;
> > +    }
> > +    return false;
> > +}
> > +
> > +#define TP_HAS_FUNC(ATTR)  \
> > +static bool \
> > +tp_has_##ATTR(struct timeout_policy *tp, uint32_t *v)              \
> > +{                                                                  \
> > +    return check_present_and_set(tp, v, CT_DPIF_TP_ATTR_##ATTR);   \
> > +}
> > +
> > +/* These functions check whether the timeout value is set from the
> > + * present bit.  If true, then set the value to '*v'.  For meaning
> > + * of each value, see CT_Timeout_Policy table at ovs-vswitchd.conf.db(5).
> > + */
> > +TP_HAS_FUNC(TCP_SYN_SENT)
> > +TP_HAS_FUNC(TCP_SYN_RECV)
> > +TP_HAS_FUNC(TCP_ESTABLISHED)
> > +TP_HAS_FUNC(TCP_FIN_WAIT)
> > +TP_HAS_FUNC(TCP_TIME_WAIT)
> > +TP_HAS_FUNC(TCP_CLOSE)
> > +TP_HAS_FUNC(UDP_FIRST)
> > +TP_HAS_FUNC(UDP_SINGLE)
> > +TP_HAS_FUNC(UDP_MULTIPLE)
> > +TP_HAS_FUNC(ICMP_FIRST)
> > +TP_HAS_FUNC(ICMP_REPLY)
> > +
> > +static bool
> > +conn_update_expiration_with_policy(struct conntrack *ct, struct conn *conn,
> > +                                   enum ct_timeout tm, long long now,
> > +                                   struct timeout_policy *tp)
> > +{
> > +    uint32_t val;
> > +
> > +    switch (tm) {
> > +    case CT_TM_TCP_FIRST_PACKET:
> > +        if (tp_has_TCP_SYN_SENT(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_OPENING:
> > +        if (tp_has_TCP_SYN_RECV(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_ESTABLISHED:
> > +        if (tp_has_TCP_ESTABLISHED(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_CLOSING:
> > +        if (tp_has_TCP_FIN_WAIT(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_FIN_WAIT:
> > +        if (tp_has_TCP_TIME_WAIT(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_CLOSED:
> > +        if (tp_has_TCP_CLOSE(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_FIRST:
> > +        if (tp_has_UDP_FIRST(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_BIDIR:
> > +        if (tp_has_UDP_SINGLE(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_MULTIPLE:
> > +        if (tp_has_UDP_MULTIPLE(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_ICMP_FIRST:
> > +        if (tp_has_ICMP_FIRST(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_ICMP_REPLY:
> > +        if (tp_has_ICMP_REPLY(tp, &val)) {
> > +            goto update_with_val;
> > +        }
> > +        break;
> > +    case N_CT_TM:
> > +    default:
> > +        OVS_NOT_REACHED();
> > +        break;
> > +    }
> > +    return false;
> > +
> > +update_with_val:
> > +    conn_update_expiration(ct, conn, tm, now, val, false);
> > +    return true;
> > +}
> > +
> > +static bool
> > +conn_init_expiration_with_policy(struct conntrack *ct, struct conn *conn,
> > +                                 enum ct_timeout tm, long long now,
> > +                                 struct timeout_policy *tp)
> > +{
> > +    uint32_t val;
> > +
> > +    switch (tm) {
> > +    case CT_TM_TCP_FIRST_PACKET:
> > +        if (tp_has_TCP_SYN_SENT(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_OPENING:
> > +        if (tp_has_TCP_SYN_RECV(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_ESTABLISHED:
> > +        if (tp_has_TCP_ESTABLISHED(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_CLOSING:
> > +        if (tp_has_TCP_FIN_WAIT(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_FIN_WAIT:
> > +        if (tp_has_TCP_TIME_WAIT(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_TCP_CLOSED:
> > +        if (tp_has_TCP_CLOSE(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_FIRST:
> > +        if (tp_has_UDP_FIRST(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_BIDIR:
> > +        if (tp_has_UDP_SINGLE(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_OTHER_MULTIPLE:
> > +        if (tp_has_UDP_MULTIPLE(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_ICMP_FIRST:
> > +        if (tp_has_ICMP_FIRST(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case CT_TM_ICMP_REPLY:
> > +        if (tp_has_ICMP_REPLY(tp, &val)) {
> > +            goto init_with_val;
> > +        }
> > +        break;
> > +    case N_CT_TM:
> > +    default:
> > +        OVS_NOT_REACHED();
> > +        break;
> > +    }
> > +    return false;
> > +
> > +init_with_val:
> > +    conn_init_expiration(ct, conn, tm, now, val, false);
> > +    return true;
> > +}
> > +
> > +void
> > +conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> > +                             enum ct_timeout tm, long long now)
> > +{
> > +    struct timeout_policy *tp;
> > +
> > +    tp = timeout_policy_lookup(ct, conn->tpid);
> > +    if (tp && conn_init_expiration_with_policy(ct, conn, tm, now, tp)) {
> > +        VLOG_DBG_RL(&rl, "Init timeout %s with policy.",
> > +                    ct_timeout_str[tm]);
> > +    } else {
> > +        /* Init with default. */
> > +        conn_init_expiration(ct, conn, tm, now, 0, true);
> > +    }
> > +}
> > +
> > +void
> > +conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> > +                               enum ct_timeout tm, long long now)
> > +{
> > +    struct timeout_policy *tp;
> > +
> > +    tp = timeout_policy_lookup(ct, conn->tpid);
> > +    if (tp && conn_update_expiration_with_policy(ct, conn, tm, now, tp)) {
> > +        VLOG_DBG_RL(&rl, "Update timeout %s with policy.",
> > +                    ct_timeout_str[tm]);
> > +    } else {
> > +        /* Update with default. */
> > +        conn_update_expiration(ct, conn, tm, now, 0, true);
> > +    }
> > +}
>
>
> > diff --git a/lib/conntrack-tp.h b/lib/conntrack-tp.h
> > new file mode 100644
> > index 000000000000..34048ac2aeea
> > --- /dev/null
> > +++ b/lib/conntrack-tp.h
> > @@ -0,0 +1,24 @@
> > +/*
> > + * Copyright (c) 2020 VMware, Inc.
> > + *
> > + * Licensed under the Apache License, Version 2.0 (the "License");
> > + * you may not use this file except in compliance with the License.
> > + * You may obtain a copy of the License at:
> > + *
> > + *     http://www.apache.org/licenses/LICENSE-2.0
> > + *
> > + * Unless required by applicable law or agreed to in writing, software
> > + * distributed under the License is distributed on an "AS IS" BASIS,
> > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> > + * See the License for the specific language governing permissions and
> > + * limitations under the License.
> > + */
> > +
> > +#ifndef CONNTRACK_TP_H
> > +#define CONNTRACK_TP_H 1
> > +
> > +void conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn,
> > +                                  enum ct_timeout tm, long long now);
> > +void conn_update_expiration_with_tp(struct conntrack *ct, struct conn 
> > *conn,
> > +                                    enum ct_timeout tm, long long now);
>
> I think introduce the _with_tp() version may cause confusion to choose
> the *with_tp() API or the existing API. Since our goal is to use
> customized timeout if it is available. I would suggest the following
> two change options.
>
> 1) Since both conn_init_expiration_with_tp() and
> conn_update_expiration_with_tp() did a lookup() on the timeout policy
> before init or udpate the expiration time. Maybe we can abstrct the
> lookup logic as a seperate function, and call the abstract funcion in
> existing conn_init_expiration() and conn_update_expiration()?
>
> 2) The other options is to maybe to rename existing
> conn_update_expiration() to conn_update_expiration__() and rename
> conn_update_expiration_with_tp() to conn_update_expiration()? Both
> option will avoid changes in the L4 specific conntrack code and have
> less confustion about whether to use the _with_tp() API or the
> existing API.
>
OK I will rename the existing one to conn_update_expiration__()

>
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index 0cbc8f6d2b25..de934bdf7169 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -430,6 +437,139 @@ zone_limit_delete(struct conntrack *ct, uint16_t zone)
> >      return 0;
> >  }
> >
> > +struct timeout_policy *
> > +timeout_policy_get(struct conntrack *ct, int32_t tpid)
> > +{
> > +    struct timeout_policy *tp;
> > +
> > +    ovs_mutex_lock(&ct->ct_lock);
> > +    tp = timeout_policy_lookup(ct, tpid);
> > +    if (!tp) {
> > +        ovs_mutex_unlock(&ct->ct_lock);
> > +        return NULL;
> > +    }
> > +
> > +    ovs_mutex_unlock(&ct->ct_lock);
> > +    return tp;
> > +}
> > +
> > +struct timeout_policy *
> > +timeout_policy_lookup(struct conntrack *ct, int32_t tpid)
> > +    OVS_REQUIRES(ct->ct_lock)
> > +{
> > +    struct timeout_policy *tp;
> > +    uint32_t hash;
> > +
> > +    hash = zone_key_hash(tpid, ct->hash_basis);
> > +    HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
> > +        if (tp->p.id == tpid) {
> > +            return tp;
> > +        }
> > +    }
> > +    return NULL;
> > +}
> > +
> > +static bool
> > +is_valid_tpid(uint32_t tpid OVS_UNUSED)
> > +{
> > +    return true;
> > +}
>
> We can get rid of this function.
>
>
> > +
> > +static int
> > +timeout_policy_create(struct conntrack *ct,
> > +                      struct timeout_policy *new_tp)
> > +    OVS_REQUIRES(ct->ct_lock)
> > +{
> > +    uint32_t tpid = new_tp->p.id;
> > +    uint32_t hash;
> > +
> > +    if (is_valid_tpid(tpid)) {
> > +        struct timeout_policy *tp;
> > +
> > +        tp = xzalloc(sizeof *tp);
> > +        memcpy(&tp->p, &new_tp->p, sizeof tp->p);
>
> Instead of directly copy 'new_tp' into 'tp', if we can fill in the
> default values for the unpresent fields in the configuration time.
> We can reduce the effort to lookup the default vaule at run time in
> conntrack-tp.c.  Same thing can apply to the update function.

I see, looks like a better design.

>
>
> > @@ -1540,14 +1689,14 @@ conntrack_clean(struct conntrack *ct, long long now)
> >   * - We want to reduce the number of wakeups and batch connection cleanup
> >   *   when the load is not very high.  CT_CLEAN_INTERVAL ensures that if we
> >   *   are coping with the current cleanup tasks, then we wait at least
> > - *   5 seconds to do further cleanup.
> > + *   1 seconds to do further cleanup.
> >   *
> >   * - We don't want to keep the map locked too long, as we might prevent
> >   *   traffic from flowing.  CT_CLEAN_MIN_INTERVAL ensures that if cleanup 
> > is
> >   *   behind, there is at least some 200ms blocks of time when the map will 
> > be
> >   *   left alone, so the datapath can operate unhindered.
> >   */
> > -#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
> > +#define CT_CLEAN_INTERVAL 1000 /* 1 second */
>
> Not sure if we need to change CT_CLEAN_INTERVAL from 5 to 1, given
> that we set CT_TM_MIN dynamically.
>
>
> > diff --git a/lib/conntrack.h b/lib/conntrack.h
> > index b0d0fc8d9597..a55609bb2907 100644
> > --- a/lib/conntrack.h
> > +++ b/lib/conntrack.h
> > @@ -20,6 +20,7 @@
> >  #include <stdbool.h>
> >
> >  #include "cmap.h"
> > +#include "ct-dpif.h"
> >  #include "latch.h"
> >  #include "odp-netlink.h"
> >  #include "openvswitch/hmap.h"
> > @@ -93,7 +94,7 @@ int conntrack_execute(struct conntrack *ct, struct 
> > dp_packet_batch *pkt_batch,
> >                        const struct ovs_key_ct_labels *setlabel,
> >                        ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
> >                        const struct nat_action_info_t *nat_action_info,
> > -                      long long now);
> > +                      long long now, uint32_t tpid);
> >  void conntrack_clear(struct dp_packet *packet);
> >
> >  struct conntrack_dump {
> > @@ -111,6 +112,11 @@ struct conntrack_zone_limit {
> >      uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
> >  };
> >
> > +struct timeout_policy {
> > +    struct hmap_node node;
> > +    struct ct_dpif_timeout_policy p;
>
> It seems to be more clear to me to name it as 'tp' than 'p'.
>
>
> > @@ -139,5 +145,11 @@ struct conntrack_zone_limit zone_limit_get(struct 
> > conntrack *ct,
> >                                             int32_t zone);
> >  int zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit);
> >  int zone_limit_delete(struct conntrack *ct, uint16_t zone);
> > +int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp);
> > +int timeout_policy_delete(struct conntrack *ct, uint32_t tpid);
> > +struct timeout_policy *timeout_policy_get(struct conntrack *ct, int32_t 
> > tpid);
> > +struct timeout_policy *timeout_policy_lookup(struct conntrack *ct,
> > +                                             int32_t tpid);
> > +
> >
> >  #endif /* conntrack.h */
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index e456cc9befbe..41e805a832c8 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -7337,6 +7337,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch 
> > *packets_,
> >          bool commit = false;
> >          unsigned int left;
> >          uint16_t zone = 0;
> > +        uint32_t tpid = 0;
> >          const char *helper = NULL;
> >          const uint32_t *setmark = NULL;
> >          const struct ovs_key_ct_labels *setlabel = NULL;
> > @@ -7372,8 +7373,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch 
> > *packets_,
> >                   * netlink events. */
> >                  break;
> >              case OVS_CT_ATTR_TIMEOUT:
> > -                /* Userspace datapath does not support customized timeout
> > -                 * policy yet. */
> > +                if (!str_to_uint(nl_attr_get_string(b), 10, &tpid)) {
> > +                    VLOG_WARN("Invalid tpid %s.", nl_attr_get_string(b));
> > +                }
>
> Looks like if the OVS_CT_ATTR_TIMEOUT is not present, we will always
> use timeout policy 0 rather than the default timeout policy, since
> from ofproto-dpif.c the tiemout policy id pools allocate id number
> from starting from 0.  One simple fix may be to adjust the timeout
> policy id pool (backer->tpids) to start from 1, so that userspace can
> use 0 as the reserved default timeout policy.  Kernel space is
> agnostic to the id pool starting number.
>
thanks!
William
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to