Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout policy support") adds conntrack timeout policy for kernel datapath. This patch enables support for the userspace datapath. I tested using the 'make check-system-userspace' which checks the timeout policies for ICMP and UDP cases.
Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/671278549 Signed-off-by: William Tu <u9012...@gmail.com> --- Documentation/faq/releases.rst | 2 +- NEWS | 2 + lib/automake.mk | 2 + lib/conntrack-icmp.c | 7 +- lib/conntrack-other.c | 9 +- lib/conntrack-private.h | 27 +++-- lib/conntrack-tcp.c | 22 ++-- lib/conntrack-tp.c | 246 +++++++++++++++++++++++++++++++++++++++ lib/conntrack-tp.h | 24 ++++ lib/conntrack.c | 159 ++++++++++++++++++++++++- lib/conntrack.h | 14 ++- lib/dpif-netdev.c | 74 ++++++++++-- tests/system-traffic.at | 3 +- tests/system-userspace-macros.at | 6 +- tests/test-conntrack.c | 6 +- 15 files changed, 561 insertions(+), 42 deletions(-) create mode 100644 lib/conntrack-tp.c create mode 100644 lib/conntrack-tp.h diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index b3507bd1c7fa..ffcda37f9985 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -118,7 +118,7 @@ Q: Are all features available with all datapaths? ========================== ============== ============== ========= ======= Connection tracking 4.3 2.5 2.6 YES Conntrack Fragment Reass. 4.3 2.6 2.12 YES - Conntrack Timeout Policies 5.2 2.12 NO NO + Conntrack Timeout Policies 5.2 2.12 2.13 NO Conntrack Zone Limit 4.18 2.10 2.13 YES Conntrack NAT 4.6 2.6 2.8 YES Tunnel - LISP NO 2.11 NO NO diff --git a/NEWS b/NEWS index 70bd17584594..1e6af8f57bdd 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,8 @@ Post-v2.13.0 * Deprecated DPDK ring ports (dpdkr) are no longer supported. - Linux datapath: * Support for kernel versions up to 5.5.x. + - Userspace datapath: + * Add support for conntrack zone-based timeout policy. v2.13.0 - 14 Feb 2020 diff --git a/lib/automake.mk b/lib/automake.mk index 95925b57c351..86940ccd2f9e 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -53,6 +53,8 @@ lib_libopenvswitch_la_SOURCES = \ lib/conntrack-icmp.c \ lib/conntrack-private.h \ lib/conntrack-tcp.c \ + lib/conntrack-tp.c \ + lib/conntrack-tp.h \ lib/conntrack-other.c \ lib/conntrack.c \ lib/conntrack.h \ diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c index 63246f0124d0..81393a6f1846 100644 --- a/lib/conntrack-icmp.c +++ b/lib/conntrack-icmp.c @@ -22,6 +22,7 @@ #include <netinet/icmp6.h> #include "conntrack-private.h" +#include "conntrack-tp.h" #include "dp-packet.h" enum OVS_PACKED_ENUM icmp_state { @@ -51,7 +52,8 @@ icmp_conn_update(struct conntrack *ct, struct conn *conn_, { struct conn_icmp *conn = conn_icmp_cast(conn_); conn->state = reply ? ICMPS_REPLY : ICMPS_FIRST; - conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now); + conn_update_expiration_with_tp(ct, &conn->up, icmp_timeouts[conn->state], + now); return CT_UPDATE_VALID; } @@ -80,7 +82,8 @@ icmp_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED, { struct conn_icmp *conn = xzalloc(sizeof *conn); conn->state = ICMPS_FIRST; - conn_init_expiration(ct, &conn->up, icmp_timeouts[conn->state], now); + conn_init_expiration_with_tp(ct, &conn->up, icmp_timeouts[conn->state], + now); return &conn->up; } diff --git a/lib/conntrack-other.c b/lib/conntrack-other.c index de22ef87cc19..cabd51964c08 100644 --- a/lib/conntrack-other.c +++ b/lib/conntrack-other.c @@ -17,6 +17,7 @@ #include <config.h> #include "conntrack-private.h" +#include "conntrack-tp.h" #include "dp-packet.h" enum OVS_PACKED_ENUM other_state { @@ -56,8 +57,9 @@ other_conn_update(struct conntrack *ct, struct conn *conn_, ret = CT_UPDATE_VALID_NEW; } - conn_update_expiration(ct, &conn->up, other_timeouts[conn->state], now); - + conn_update_expiration_with_tp(ct, &conn->up, + other_timeouts[conn->state], + now); return ret; } @@ -76,7 +78,8 @@ other_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED, conn = xzalloc(sizeof *conn); conn->state = OTHERS_FIRST; - conn_init_expiration(ct, &conn->up, other_timeouts[conn->state], now); + conn_init_expiration_with_tp(ct, &conn->up, other_timeouts[conn->state], + now); return &conn->up; } diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h index 9a8ca3910157..852482303684 100644 --- a/lib/conntrack-private.h +++ b/lib/conntrack-private.h @@ -118,6 +118,8 @@ struct conn { /* Immutable data. */ bool alg_related; /* True if alg data connection. */ enum ct_conn_type conn_type; + + uint32_t tpid; /* Timeout policy ID. */ }; enum ct_update_res { @@ -143,9 +145,9 @@ enum ct_update_res { CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \ CT_TIMEOUT(ICMP_REPLY, 30 * 1000) -/* The smallest of the above values: it is used as an upper bound for the - * interval between two rounds of cleanup of expired entries */ -#define CT_TM_MIN (30 * 1000) +/* This is used as an upper bound for the interval between two + * rounds of cleanup of expired entries */ +#define CT_TM_MIN (1 * 1000) #define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN); CT_TIMEOUTS @@ -163,6 +165,7 @@ struct conntrack { struct cmap conns OVS_GUARDED; struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED; struct hmap zone_limits OVS_GUARDED; + struct hmap timeout_policies OVS_GUARDED; uint32_t hash_basis; /* Salt for hashing a connection key. */ pthread_t clean_thread; /* Periodically cleans up connection tracker. */ struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */ @@ -212,16 +215,22 @@ extern long long ct_timeout_val[]; /* ct_lock must be held. */ static inline void conn_init_expiration(struct conntrack *ct, struct conn *conn, - enum ct_timeout tm, long long now) + enum ct_timeout tm, long long now, + uint32_t tp_value, bool use_default) { - conn->expiration = now + ct_timeout_val[tm]; + if (use_default) { + conn->expiration = now + ct_timeout_val[tm]; + } else { + conn->expiration = now + tp_value * 1000; + } ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node); } /* The conn entry lock must be held on entry and exit. */ static inline void conn_update_expiration(struct conntrack *ct, struct conn *conn, - enum ct_timeout tm, long long now) + enum ct_timeout tm, long long now, + uint32_t tp_value, bool use_default) OVS_NO_THREAD_SAFETY_ANALYSIS { ovs_mutex_unlock(&conn->lock); @@ -229,7 +238,11 @@ conn_update_expiration(struct conntrack *ct, struct conn *conn, ovs_mutex_lock(&ct->ct_lock); ovs_mutex_lock(&conn->lock); if (!conn->cleaned) { - conn->expiration = now + ct_timeout_val[tm]; + if (use_default) { + conn->expiration = now + ct_timeout_val[tm]; + } else { + conn->expiration = now + tp_value * 1000; + } ovs_list_remove(&conn->exp_node); ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node); } diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c index 47261c7551d1..194c5a1ba410 100644 --- a/lib/conntrack-tcp.c +++ b/lib/conntrack-tcp.c @@ -39,6 +39,7 @@ #include <config.h> #include "conntrack-private.h" +#include "conntrack-tp.h" #include "coverage.h" #include "ct-dpif.h" #include "dp-packet.h" @@ -188,7 +189,8 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_, return CT_UPDATE_NEW; } else if (src->state <= CT_DPIF_TCPS_SYN_SENT) { src->state = CT_DPIF_TCPS_SYN_SENT; - conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIRST_PACKET, now); + conn_update_expiration_with_tp(ct, &conn->up, + CT_TM_TCP_FIRST_PACKET, now); return CT_UPDATE_VALID_NEW; } } @@ -339,18 +341,23 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_, if (src->state >= CT_DPIF_TCPS_FIN_WAIT_2 && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2) { - conn_update_expiration(ct, &conn->up, CT_TM_TCP_CLOSED, now); + conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_CLOSED, + now); } else if (src->state >= CT_DPIF_TCPS_CLOSING && dst->state >= CT_DPIF_TCPS_CLOSING) { - conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIN_WAIT, now); + conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_FIN_WAIT, + now); } else if (src->state < CT_DPIF_TCPS_ESTABLISHED || dst->state < CT_DPIF_TCPS_ESTABLISHED) { - conn_update_expiration(ct, &conn->up, CT_TM_TCP_OPENING, now); + conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_OPENING, + now); } else if (src->state >= CT_DPIF_TCPS_CLOSING || dst->state >= CT_DPIF_TCPS_CLOSING) { - conn_update_expiration(ct, &conn->up, CT_TM_TCP_CLOSING, now); + conn_update_expiration_with_tp(ct, &conn->up, CT_TM_TCP_CLOSING, + now); } else { - conn_update_expiration(ct, &conn->up, CT_TM_TCP_ESTABLISHED, now); + conn_update_expiration_with_tp(ct, &conn->up, + CT_TM_TCP_ESTABLISHED, now); } } else if ((dst->state < CT_DPIF_TCPS_SYN_SENT || dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 @@ -471,7 +478,8 @@ tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now) src->state = CT_DPIF_TCPS_SYN_SENT; dst->state = CT_DPIF_TCPS_CLOSED; - conn_init_expiration(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET, now); + conn_init_expiration_with_tp(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET, + now); return &newconn->up; } diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c new file mode 100644 index 000000000000..87564a71671f --- /dev/null +++ b/lib/conntrack-tp.c @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2020 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#include "conntrack-private.h" +#include "conntrack-tp.h" +#include "dp-packet.h" + +#include "openvswitch/vlog.h" +VLOG_DEFINE_THIS_MODULE(conntrack_tp); +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + +static const char *ct_timeout_str[] = { +#define CT_TIMEOUT(NAME, VALUE) #NAME + CT_TIMEOUTS +#undef CT_TIMEOUT +}; + +static inline bool +check_present_and_set(struct timeout_policy *tp, uint32_t *v, + enum ct_dpif_tp_attr attr) +{ + if (tp && (tp->p.present & (1 << attr))) { + *v = tp->p.attrs[attr]; + return true; + } + return false; +} + +#define TP_HAS_FUNC(ATTR) \ +static bool \ +tp_has_##ATTR(struct timeout_policy *tp, uint32_t *v) \ +{ \ + return check_present_and_set(tp, v, CT_DPIF_TP_ATTR_##ATTR); \ +} + +/* These functions check whether the timeout value is set from the + * present bit. If true, then set the value to '*v'. For meaning + * of each value, see CT_Timeout_Policy table at ovs-vswitchd.conf.db(5). + * + * Note that ICMP_REPLY means receiving an ICMP error reply, and + * currently it is not implemented. + */ +TP_HAS_FUNC(TCP_SYN_SENT) +TP_HAS_FUNC(TCP_SYN_RECV) +TP_HAS_FUNC(TCP_ESTABLISHED) +TP_HAS_FUNC(TCP_FIN_WAIT) +TP_HAS_FUNC(TCP_TIME_WAIT) +TP_HAS_FUNC(TCP_CLOSE) +TP_HAS_FUNC(UDP_FIRST) +TP_HAS_FUNC(UDP_SINGLE) +TP_HAS_FUNC(UDP_MULTIPLE) +TP_HAS_FUNC(ICMP_FIRST) +/* TP_HAS_FUNC(ICMP_REPLY) not implemented. */ + +static bool +conn_update_expiration_with_policy(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now, + struct timeout_policy *tp) +{ + uint32_t val; + + switch (tm) { + case CT_TM_TCP_FIRST_PACKET: + if (tp_has_TCP_SYN_SENT(tp, &val)) { + goto update_with_val; + } + break; + case CT_TM_TCP_OPENING: + if (tp_has_TCP_SYN_RECV(tp, &val)) { + goto update_with_val; + } + break; + case CT_TM_TCP_ESTABLISHED: + if (tp_has_TCP_ESTABLISHED(tp, &val)) { + goto update_with_val; + } + break; + case CT_TM_TCP_CLOSING: + if (tp_has_TCP_FIN_WAIT(tp, &val)) { + goto update_with_val; + } + break; + case CT_TM_TCP_FIN_WAIT: + if (tp_has_TCP_TIME_WAIT(tp, &val)) { + goto update_with_val; + } + break; + case CT_TM_TCP_CLOSED: + if (tp_has_TCP_CLOSE(tp, &val)) { + goto update_with_val; + } + break; + case CT_TM_OTHER_FIRST: + if (tp_has_UDP_FIRST(tp, &val)) { + goto update_with_val; + } + break; + case CT_TM_OTHER_BIDIR: + if (tp_has_UDP_SINGLE(tp, &val)) { + goto update_with_val; + } + break; + case CT_TM_OTHER_MULTIPLE: + if (tp_has_UDP_MULTIPLE(tp, &val)) { + goto update_with_val; + } + break; + case CT_TM_ICMP_FIRST: + if (tp_has_ICMP_FIRST(tp, &val)) { + goto update_with_val; + } + break; + case CT_TM_ICMP_REPLY: + /* Not Implemented. */ + return true; + case N_CT_TM: + default: + OVS_NOT_REACHED(); + break; + } + return false; + +update_with_val: + conn_update_expiration(ct, conn, tm, now, val, false); + return true; +} + +static bool +conn_init_expiration_with_policy(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now, + struct timeout_policy *tp) +{ + uint32_t val; + + switch (tm) { + case CT_TM_TCP_FIRST_PACKET: + if (tp_has_TCP_SYN_SENT(tp, &val)) { + goto init_with_val; + } + break; + case CT_TM_TCP_OPENING: + if (tp_has_TCP_SYN_RECV(tp, &val)) { + goto init_with_val; + } + break; + case CT_TM_TCP_ESTABLISHED: + if (tp_has_TCP_ESTABLISHED(tp, &val)) { + goto init_with_val; + } + break; + case CT_TM_TCP_CLOSING: + if (tp_has_TCP_FIN_WAIT(tp, &val)) { + goto init_with_val; + } + break; + case CT_TM_TCP_FIN_WAIT: + if (tp_has_TCP_TIME_WAIT(tp, &val)) { + goto init_with_val; + } + break; + case CT_TM_TCP_CLOSED: + if (tp_has_TCP_CLOSE(tp, &val)) { + goto init_with_val; + } + break; + case CT_TM_OTHER_FIRST: + if (tp_has_UDP_FIRST(tp, &val)) { + goto init_with_val; + } + break; + case CT_TM_OTHER_BIDIR: + if (tp_has_UDP_SINGLE(tp, &val)) { + goto init_with_val; + } + break; + case CT_TM_OTHER_MULTIPLE: + if (tp_has_UDP_MULTIPLE(tp, &val)) { + goto init_with_val; + } + break; + case CT_TM_ICMP_FIRST: + if (tp_has_ICMP_FIRST(tp, &val)) { + goto init_with_val; + } + break; + case CT_TM_ICMP_REPLY: + /* Not Implemented. */ + return true; + case N_CT_TM: + default: + OVS_NOT_REACHED(); + break; + } + return false; + +init_with_val: + conn_init_expiration(ct, conn, tm, now, val, false); + return true; +} + +void +conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now) +{ + struct timeout_policy *tp; + + tp = timeout_policy_lookup(ct, conn->tpid); + if (tp && conn_init_expiration_with_policy(ct, conn, tm, now, tp)) { + VLOG_DBG_RL(&rl, "Init timeout %s with policy.", + ct_timeout_str[tm]); + } else { + /* Init with default. */ + conn_init_expiration(ct, conn, tm, now, 0, true); + } +} + +void +conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now) +{ + struct timeout_policy *tp; + + tp = timeout_policy_lookup(ct, conn->tpid); + if (tp && conn_update_expiration_with_policy(ct, conn, tm, now, tp)) { + VLOG_DBG_RL(&rl, "Update timeout %s with policy.", + ct_timeout_str[tm]); + } else { + /* Update with default. */ + conn_update_expiration(ct, conn, tm, now, 0, true); + } +} diff --git a/lib/conntrack-tp.h b/lib/conntrack-tp.h new file mode 100644 index 000000000000..34048ac2aeea --- /dev/null +++ b/lib/conntrack-tp.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2020 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CONNTRACK_TP_H +#define CONNTRACK_TP_H 1 + +void conn_init_expiration_with_tp(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now); +void conn_update_expiration_with_tp(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now); +#endif diff --git a/lib/conntrack.c b/lib/conntrack.c index 0cbc8f6d2b25..de934bdf7169 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -143,6 +143,12 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx, static void expectation_clean(struct conntrack *ct, const struct conn_key *master_key); +static void timeout_policy_init(struct conntrack *ct); +static int timeout_policy_create(struct conntrack *ct, + struct timeout_policy *tp); +static void timeout_policy_clean(struct conntrack *ct, + struct timeout_policy *tp); + static struct ct_l4_proto *l4_protos[] = { [IPPROTO_TCP] = &ct_proto_tcp, [IPPROTO_UDP] = &ct_proto_other, @@ -312,6 +318,7 @@ conntrack_init(void) } hmap_init(&ct->zone_limits); ct->zone_limit_seq = 0; + timeout_policy_init(ct); ovs_mutex_unlock(&ct->ct_lock); ct->hash_basis = random_uint32(); @@ -430,6 +437,139 @@ zone_limit_delete(struct conntrack *ct, uint16_t zone) return 0; } +struct timeout_policy * +timeout_policy_get(struct conntrack *ct, int32_t tpid) +{ + struct timeout_policy *tp; + + ovs_mutex_lock(&ct->ct_lock); + tp = timeout_policy_lookup(ct, tpid); + if (!tp) { + ovs_mutex_unlock(&ct->ct_lock); + return NULL; + } + + ovs_mutex_unlock(&ct->ct_lock); + return tp; +} + +struct timeout_policy * +timeout_policy_lookup(struct conntrack *ct, int32_t tpid) + OVS_REQUIRES(ct->ct_lock) +{ + struct timeout_policy *tp; + uint32_t hash; + + hash = zone_key_hash(tpid, ct->hash_basis); + HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) { + if (tp->p.id == tpid) { + return tp; + } + } + return NULL; +} + +static bool +is_valid_tpid(uint32_t tpid OVS_UNUSED) +{ + return true; +} + +static int +timeout_policy_create(struct conntrack *ct, + struct timeout_policy *new_tp) + OVS_REQUIRES(ct->ct_lock) +{ + uint32_t tpid = new_tp->p.id; + uint32_t hash; + + if (is_valid_tpid(tpid)) { + struct timeout_policy *tp; + + tp = xzalloc(sizeof *tp); + memcpy(&tp->p, &new_tp->p, sizeof tp->p); + + hash = zone_key_hash(tpid, ct->hash_basis); + hmap_insert(&ct->timeout_policies, &tp->node, hash); + + return 0; + } else { + return EINVAL; + } +} + +static void +update_existing_tp(struct timeout_policy *tp_dst, + struct timeout_policy *tp_src) +{ + struct ct_dpif_timeout_policy *dst, *src; + int i; + + dst = &tp_dst->p; + src = &tp_src->p; + + /* Set the value to dst if present bit in src is set. */ + for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) { + if (src->present & (1 << i)) { + dst->attrs[i] = src->attrs[i]; + dst->present |= (1 << i); + } + } +} + +int +timeout_policy_update(struct conntrack *ct, struct timeout_policy *new_tp) +{ + int err = 0; + uint32_t tpid = new_tp->p.id; + + ovs_mutex_lock(&ct->ct_lock); + struct timeout_policy *tp = timeout_policy_lookup(ct, tpid); + if (tp) { + VLOG_INFO("Changed timeout policy of existing tpid %d", tpid); + update_existing_tp(tp, new_tp); + } else { + err = timeout_policy_create(ct, new_tp); + if (err) { + VLOG_WARN("Request to create timeout policy failed"); + } else { + VLOG_INFO("Created timeout policy tpid %d", tpid); + } + } + ovs_mutex_unlock(&ct->ct_lock); + return err; +} + +static void +timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp) + OVS_REQUIRES(ct->ct_lock) +{ + hmap_remove(&ct->timeout_policies, &tp->node); + free(tp); +} + +int +timeout_policy_delete(struct conntrack *ct, uint32_t tpid) +{ + ovs_mutex_lock(&ct->ct_lock); + struct timeout_policy *tp = timeout_policy_lookup(ct, tpid); + if (tp) { + VLOG_INFO("Deleted timeout policy for id %d", tpid); + timeout_policy_clean(ct, tp); + } else { + VLOG_INFO("Attempted delete of non-existent timeout policy: zone %d", + tpid); + } + ovs_mutex_unlock(&ct->ct_lock); + return 0; +} + +void +timeout_policy_init(struct conntrack *ct) +{ + hmap_init(&ct->timeout_policies); +} + static void conn_clean_cmn(struct conntrack *ct, struct conn *conn) OVS_REQUIRES(ct->ct_lock) @@ -502,6 +642,12 @@ conntrack_destroy(struct conntrack *ct) } hmap_destroy(&ct->zone_limits); + struct timeout_policy *tp; + HMAP_FOR_EACH_POP (tp, node, &ct->timeout_policies) { + free(tp); + } + hmap_destroy(&ct->timeout_policies); + ovs_mutex_unlock(&ct->ct_lock); ovs_mutex_destroy(&ct->ct_lock); @@ -1275,7 +1421,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, bool force, bool commit, long long now, const uint32_t *setmark, const struct ovs_key_ct_labels *setlabel, const struct nat_action_info_t *nat_action_info, - ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper) + ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper, + uint32_t tpid) { /* Reset ct_state whenever entering a new zone. */ if (pkt->md.ct_state && pkt->md.ct_zone != zone) { @@ -1323,6 +1470,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, nat_action_info, ct_alg_ctl, now, &create_new_conn))) { + conn->tpid = tpid; create_new_conn = conn_update_state(ct, pkt, ctx, conn, now); } if (nat_action_info && !create_new_conn) { @@ -1395,7 +1543,7 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, const struct ovs_key_ct_labels *setlabel, ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper, const struct nat_action_info_t *nat_action_info, - long long now) + long long now, uint32_t tpid) { ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone, ct->hash_basis); @@ -1417,7 +1565,8 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, write_ct_md(packet, zone, NULL, NULL, NULL); } else { process_one(ct, packet, &ctx, zone, force, commit, now, setmark, - setlabel, nat_action_info, tp_src, tp_dst, helper); + setlabel, nat_action_info, tp_src, tp_dst, helper, + tpid); } } @@ -1540,14 +1689,14 @@ conntrack_clean(struct conntrack *ct, long long now) * - We want to reduce the number of wakeups and batch connection cleanup * when the load is not very high. CT_CLEAN_INTERVAL ensures that if we * are coping with the current cleanup tasks, then we wait at least - * 5 seconds to do further cleanup. + * 1 seconds to do further cleanup. * * - We don't want to keep the map locked too long, as we might prevent * traffic from flowing. CT_CLEAN_MIN_INTERVAL ensures that if cleanup is * behind, there is at least some 200ms blocks of time when the map will be * left alone, so the datapath can operate unhindered. */ -#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */ +#define CT_CLEAN_INTERVAL 1000 /* 1 second */ #define CT_CLEAN_MIN_INTERVAL 200 /* 0.2 seconds */ static void * diff --git a/lib/conntrack.h b/lib/conntrack.h index b0d0fc8d9597..a55609bb2907 100644 --- a/lib/conntrack.h +++ b/lib/conntrack.h @@ -20,6 +20,7 @@ #include <stdbool.h> #include "cmap.h" +#include "ct-dpif.h" #include "latch.h" #include "odp-netlink.h" #include "openvswitch/hmap.h" @@ -93,7 +94,7 @@ int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, const struct ovs_key_ct_labels *setlabel, ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper, const struct nat_action_info_t *nat_action_info, - long long now); + long long now, uint32_t tpid); void conntrack_clear(struct dp_packet *packet); struct conntrack_dump { @@ -111,6 +112,11 @@ struct conntrack_zone_limit { uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */ }; +struct timeout_policy { + struct hmap_node node; + struct ct_dpif_timeout_policy p; +}; + enum { INVALID_ZONE = -2, DEFAULT_ZONE = -1, /* Default zone for zone limit management. */ @@ -139,5 +145,11 @@ struct conntrack_zone_limit zone_limit_get(struct conntrack *ct, int32_t zone); int zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit); int zone_limit_delete(struct conntrack *ct, uint16_t zone); +int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp); +int timeout_policy_delete(struct conntrack *ct, uint32_t tpid); +struct timeout_policy *timeout_policy_get(struct conntrack *ct, int32_t tpid); +struct timeout_policy *timeout_policy_lookup(struct conntrack *ct, + int32_t tpid); + #endif /* conntrack.h */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index e456cc9befbe..41e805a832c8 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -7337,6 +7337,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, bool commit = false; unsigned int left; uint16_t zone = 0; + uint32_t tpid = 0; const char *helper = NULL; const uint32_t *setmark = NULL; const struct ovs_key_ct_labels *setlabel = NULL; @@ -7372,8 +7373,9 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, * netlink events. */ break; case OVS_CT_ATTR_TIMEOUT: - /* Userspace datapath does not support customized timeout - * policy yet. */ + if (!str_to_uint(nl_attr_get_string(b), 10, &tpid)) { + VLOG_WARN("Invalid tpid %s.", nl_attr_get_string(b)); + } break; case OVS_CT_ATTR_NAT: { const struct nlattr *b_nest; @@ -7459,7 +7461,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, conntrack_execute(dp->conntrack, packets_, aux->flow->dl_type, force, commit, zone, setmark, setlabel, aux->flow->tp_src, aux->flow->tp_dst, helper, nat_action_info_ref, - pmd->ctx.now / 1000); + pmd->ctx.now / 1000, tpid); break; } @@ -7693,6 +7695,64 @@ dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED, } static int +dpif_netdev_ct_set_timeout_policy(struct dpif *dpif, + const struct ct_dpif_timeout_policy *dpif_tp) +{ + struct timeout_policy tp; + struct dp_netdev *dp; + int err = 0; + + dp = get_dp_netdev(dpif); + memcpy(&tp.p, dpif_tp, sizeof tp.p); + err = timeout_policy_update(dp->conntrack, &tp); + return err; +} + +static int +dpif_netdev_ct_get_timeout_policy(struct dpif *dpif, uint32_t tp_id, + struct ct_dpif_timeout_policy *dpif_tp) +{ + struct timeout_policy *tp; + struct dp_netdev *dp; + int err = 0; + + dp = get_dp_netdev(dpif); + tp = timeout_policy_get(dp->conntrack, tp_id); + if (!tp) { + return EINVAL; + } + memcpy(dpif_tp, &tp->p, sizeof tp->p); + return err; +} + +static int +dpif_netdev_ct_del_timeout_policy(struct dpif *dpif, + uint32_t tp_id) +{ + struct dp_netdev *dp; + int err = 0; + + dp = get_dp_netdev(dpif); + err = timeout_policy_delete(dp->conntrack, tp_id); + return err; +} + +static int +dpif_netdev_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED, + uint32_t tp_id, + uint16_t dl_type OVS_UNUSED, + uint8_t nw_proto OVS_UNUSED, + char **tp_name, bool *is_generic) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + + ds_put_format(&ds, "%"PRIu32, tp_id); + *tp_name = ds_steal_cstr(&ds); + *is_generic = true; + return 0; +} + +static int dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable) { struct dp_netdev *dp = get_dp_netdev(dpif); @@ -7802,13 +7862,13 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_ct_set_limits, dpif_netdev_ct_get_limits, dpif_netdev_ct_del_limits, - NULL, /* ct_set_timeout_policy */ - NULL, /* ct_get_timeout_policy */ - NULL, /* ct_del_timeout_policy */ + dpif_netdev_ct_set_timeout_policy, + dpif_netdev_ct_get_timeout_policy, + dpif_netdev_ct_del_timeout_policy, NULL, /* ct_timeout_policy_dump_start */ NULL, /* ct_timeout_policy_dump_next */ NULL, /* ct_timeout_policy_dump_done */ - NULL, /* ct_get_timeout_policy_name */ + dpif_netdev_ct_get_timeout_policy_name, dpif_netdev_ipf_set_enabled, dpif_netdev_ipf_set_min_frag, dpif_netdev_ipf_set_max_nfrags, diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 4a39c929c207..848427fa7314 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -3301,8 +3301,9 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src= AT_CHECK([ovs-appctl dpctl/flush-conntrack]) dnl Shorten the udp_single and icmp_first timeout in zone 5 +dnl Userspace datapath uses udp_first, and kernel datapath uses udp_single VSCTL_ADD_DATAPATH_TABLE() -AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3]) +AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_first=3 udp_single=3 icmp_first=3]) dnl Send ICMP and UDP traffic NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index ba7f4102f494..72c84b9c7c82 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -99,12 +99,8 @@ m4_define([CHECK_CONNTRACK_NAT]) # CHECK_CONNTRACK_TIMEOUT() # # Perform requirements checks for running conntrack customized timeout tests. -* The userspace datapath does not support this feature yet. # -m4_define([CHECK_CONNTRACK_TIMEOUT], -[ - AT_SKIP_IF([:]) -]) +m4_define([CHECK_CONNTRACK_TIMEOUT]) # CHECK_CT_DPIF_SET_GET_MAXCONNS() # diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c index f77ee75e38df..e7c73220aef5 100644 --- a/tests/test-conntrack.c +++ b/tests/test-conntrack.c @@ -90,7 +90,7 @@ ct_thread_main(void *aux_) ovs_barrier_block(&barrier); for (i = 0; i < n_pkts; i += batch_size) { conntrack_execute(ct, pkt_batch, dl_type, false, true, 0, NULL, NULL, - 0, 0, NULL, NULL, now); + 0, 0, NULL, NULL, now, 0); } ovs_barrier_block(&barrier); destroy_packets(pkt_batch); @@ -174,7 +174,7 @@ pcap_batch_execute_conntrack(struct conntrack *ct_, if (flow.dl_type != dl_type) { conntrack_execute(ct_, &new_batch, dl_type, false, true, 0, - NULL, NULL, 0, 0, NULL, NULL, now); + NULL, NULL, 0, 0, NULL, NULL, now, 0); dp_packet_batch_init(&new_batch); } dp_packet_batch_add(&new_batch, packet); @@ -182,7 +182,7 @@ pcap_batch_execute_conntrack(struct conntrack *ct_, if (!dp_packet_batch_is_empty(&new_batch)) { conntrack_execute(ct_, &new_batch, dl_type, false, true, 0, NULL, NULL, - 0, 0, NULL, NULL, now); + 0, 0, NULL, NULL, now, 0); } } -- 2.7.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev