Paolo Valerio <pvale...@redhat.com> writes: > From: hepeng <hepeng.0...@bytedance.com> > > The patch avoids the extra allocation for nat_conn. > Currently, when doing NAT, the userspace conntrack will use an extra > conn for the two directions in a flow. However, each conn has actually > the two keys for both orig and rev directions. This patch introduces a > key_node[CT_DIRS] member as per Aaron's suggestion in the conn which > consists of a key, direction, and a cmap_node for hash lookup so > addressing the feedback received by the original patch [0]. > > [0] > https://patchwork.ozlabs.org/project/openvswitch/patch/20201129033255.64647-2-hepeng.0...@bytedance.com/ > > Signed-off-by: Peng He <hepeng.0...@bytedance.com> > Co-authored-by: Paolo Valerio <pvale...@redhat.com> > Signed-off-by: Paolo Valerio <pvale...@redhat.com> > ---
Thanks Paolo and Peng! Acked-by: Aaron Conole <acon...@redhat.com> > v2: > - use enum value instead of bool (Aaron). > - s/conn_for_expectation/conn_for_exp/ in process_ftp_ctl_v6() > to avoid long line. > - removed CT_CONN_TYPE_* reference in two comments. > --- > lib/conntrack-private.h | 19 +-- > lib/conntrack-tp.c | 6 + > lib/conntrack.c | 350 > +++++++++++++++++++---------------------------- > 3 files changed, 155 insertions(+), 220 deletions(-) > > diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h > index bb326868e..3fd5fccd3 100644 > --- a/lib/conntrack-private.h > +++ b/lib/conntrack-private.h > @@ -49,6 +49,12 @@ struct ct_endpoint { > * hashing in ct_endpoint_hash_add(). */ > BUILD_ASSERT_DECL(sizeof(struct ct_endpoint) == sizeof(union ct_addr) + 4); > > +enum key_dir { > + CT_DIR_FWD = 0, > + CT_DIR_REV, > + CT_DIRS, > +}; > + > /* Changes to this structure need to be reflected in conn_key_hash() > * and conn_key_cmp(). */ > struct conn_key { > @@ -112,20 +118,18 @@ enum ct_timeout { > > #define N_EXP_LISTS 100 > > -enum OVS_PACKED_ENUM ct_conn_type { > - CT_CONN_TYPE_DEFAULT, > - CT_CONN_TYPE_UN_NAT, > +struct conn_key_node { > + enum key_dir dir; > + struct conn_key key; > + struct cmap_node cm_node; > }; > > struct conn { > /* Immutable data. */ > - struct conn_key key; > - struct conn_key rev_key; > + struct conn_key_node key_node[CT_DIRS]; > struct conn_key parent_key; /* Only used for orig_tuple support. */ > - struct cmap_node cm_node; > uint16_t nat_action; > char *alg; > - struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */ > atomic_flag reclaimed; /* False during the lifetime of the connection, > * True as soon as a thread has started freeing > * its memory. */ > @@ -150,7 +154,6 @@ struct conn { > > /* Immutable data. */ > bool alg_related; /* True if alg data connection. */ > - enum ct_conn_type conn_type; > > uint32_t tp_id; /* Timeout policy ID. */ > }; > diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c > index 89cb2704a..2149fdc73 100644 > --- a/lib/conntrack-tp.c > +++ b/lib/conntrack-tp.c > @@ -253,7 +253,8 @@ conn_update_expiration(struct conntrack *ct, struct conn > *conn, > } > VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d " > "val=%u sec.", > - ct_timeout_str[tm], conn->key.zone, conn->tp_id, val); > + ct_timeout_str[tm], conn->key_node[CT_DIR_FWD].key.zone, > + conn->tp_id, val); > > atomic_store_relaxed(&conn->expiration, now + val * 1000); > } > @@ -273,7 +274,8 @@ conn_init_expiration(struct conntrack *ct, struct conn > *conn, > } > > VLOG_DBG_RL(&rl, "Init timeout %s zone=%u with policy id=%d val=%u sec.", > - ct_timeout_str[tm], conn->key.zone, conn->tp_id, val); > + ct_timeout_str[tm], conn->key_node[CT_DIR_FWD].key.zone, > + conn->tp_id, val); > > conn->expiration = now + val * 1000; > } > diff --git a/lib/conntrack.c b/lib/conntrack.c > index 5f1176d33..f75f9a8f1 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -113,8 +113,7 @@ static void set_label(struct dp_packet *, struct conn *, > static void *clean_thread_main(void *f_); > > static bool > -nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn, > - struct conn *nat_conn, > +nat_get_unique_tuple(struct conntrack *ct, struct conn *conn, > const struct nat_action_info_t *nat_info); > > static uint8_t > @@ -208,7 +207,7 @@ static alg_helper alg_helpers[] = { > #define ALG_WC_SRC_PORT 0 > > /* If the total number of connections goes above this value, no new > connections > - * are accepted; this is for CT_CONN_TYPE_DEFAULT connections. */ > + * are accepted. */ > #define DEFAULT_N_CONN_LIMIT 3000000 > > /* Does a member by member comparison of two conn_keys; this > @@ -234,61 +233,6 @@ conn_key_cmp(const struct conn_key *key1, const struct > conn_key *key2) > return 1; > } > > -static void > -ct_print_conn_info(const struct conn *c, const char *log_msg, > - enum vlog_level vll, bool force, bool rl_on) > -{ > -#define CT_VLOG(RL_ON, LEVEL, ...) \ > - do { \ > - if (RL_ON) { \ > - static struct vlog_rate_limit rl_ = VLOG_RATE_LIMIT_INIT(5, 5); \ > - vlog_rate_limit(&this_module, LEVEL, &rl_, __VA_ARGS__); \ > - } else { \ > - vlog(&this_module, LEVEL, __VA_ARGS__); \ > - } \ > - } while (0) > - > - if (OVS_UNLIKELY(force || vlog_is_enabled(&this_module, vll))) { > - if (c->key.dl_type == htons(ETH_TYPE_IP)) { > - CT_VLOG(rl_on, vll, "%s: src ip "IP_FMT" dst ip "IP_FMT" rev src > " > - "ip "IP_FMT" rev dst ip "IP_FMT" src/dst ports " > - "%"PRIu16"/%"PRIu16" rev src/dst ports " > - "%"PRIu16"/%"PRIu16" zone/rev zone " > - "%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto " > - "%"PRIu8"/%"PRIu8, log_msg, > - IP_ARGS(c->key.src.addr.ipv4), > - IP_ARGS(c->key.dst.addr.ipv4), > - IP_ARGS(c->rev_key.src.addr.ipv4), > - IP_ARGS(c->rev_key.dst.addr.ipv4), > - ntohs(c->key.src.port), ntohs(c->key.dst.port), > - ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port), > - c->key.zone, c->rev_key.zone, c->key.nw_proto, > - c->rev_key.nw_proto); > - } else { > - char ip6_s[INET6_ADDRSTRLEN]; > - inet_ntop(AF_INET6, &c->key.src.addr.ipv6, ip6_s, sizeof ip6_s); > - char ip6_d[INET6_ADDRSTRLEN]; > - inet_ntop(AF_INET6, &c->key.dst.addr.ipv6, ip6_d, sizeof ip6_d); > - char ip6_rs[INET6_ADDRSTRLEN]; > - inet_ntop(AF_INET6, &c->rev_key.src.addr.ipv6, ip6_rs, > - sizeof ip6_rs); > - char ip6_rd[INET6_ADDRSTRLEN]; > - inet_ntop(AF_INET6, &c->rev_key.dst.addr.ipv6, ip6_rd, > - sizeof ip6_rd); > - > - CT_VLOG(rl_on, vll, "%s: src ip %s dst ip %s rev src ip %s" > - " rev dst ip %s src/dst ports %"PRIu16"/%"PRIu16 > - " rev src/dst ports %"PRIu16"/%"PRIu16" zone/rev zone " > - "%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto " > - "%"PRIu8"/%"PRIu8, log_msg, ip6_s, ip6_d, ip6_rs, > - ip6_rd, ntohs(c->key.src.port), ntohs(c->key.dst.port), > - ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port), > - c->key.zone, c->rev_key.zone, c->key.nw_proto, > - c->rev_key.nw_proto); > - } > - } > -} > - > /* Initializes the connection tracker 'ct'. The caller is responsible for > * calling 'conntrack_destroy()', when the instance is not needed anymore */ > struct conntrack * > @@ -477,28 +421,27 @@ conn_clean__(struct conntrack *ct, struct conn *conn) > uint32_t hash; > > if (conn->alg) { > - expectation_clean(ct, &conn->key); > + expectation_clean(ct, &conn->key_node[CT_DIR_FWD].key); > } > > - hash = conn_key_hash(&conn->key, ct->hash_basis); > - cmap_remove(&ct->conns, &conn->cm_node, hash); > + hash = conn_key_hash(&conn->key_node[CT_DIR_FWD].key, ct->hash_basis); > + cmap_remove(&ct->conns, &conn->key_node[CT_DIR_FWD].cm_node, hash); > > - if (conn->nat_conn) { > - hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis); > - cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash); > + if (conn->nat_action) { > + hash = conn_key_hash(&conn->key_node[CT_DIR_REV].key, > + ct->hash_basis); > + cmap_remove(&ct->conns, &conn->key_node[CT_DIR_REV].cm_node, hash); > } > > rculist_remove(&conn->node); > } > > -/* Must be called with 'conn' of 'conn_type' CT_CONN_TYPE_DEFAULT. Also > - * removes the associated nat 'conn' from the lookup datastructures. */ > +/* Also removes the associated nat 'conn' from the lookup > + datastructures. */ > static void > conn_clean(struct conntrack *ct, struct conn *conn) > OVS_EXCLUDED(conn->lock, ct->ct_lock) > { > - ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); > - > if (atomic_flag_test_and_set(&conn->reclaimed)) { > return; > } > @@ -585,34 +528,34 @@ conn_key_lookup(struct conntrack *ct, const struct > conn_key *key, > uint32_t hash, long long now, struct conn **conn_out, > bool *reply) > { > - struct conn *conn; > + struct conn_key_node *keyn; > + struct conn *conn = NULL; > bool found = false; > > - CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &ct->conns) { > + CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash, &ct->conns) { > + conn = CONTAINER_OF(keyn, struct conn, key_node[keyn->dir]); > if (conn_expired(conn, now)) { > continue; > } > - if (!conn_key_cmp(&conn->key, key)) { > - found = true; > - if (reply) { > - *reply = false; > - } > - break; > - } > - if (!conn_key_cmp(&conn->rev_key, key)) { > - found = true; > - if (reply) { > - *reply = true; > + > + for (int i = CT_DIR_FWD; i < CT_DIRS; i++) { > + if (!conn_key_cmp(&conn->key_node[i].key, key)) { > + found = true; > + if (reply) { > + *reply = i; > + } > + goto out_found; > } > - break; > } > } > > +out_found: > if (found && conn_out) { > *conn_out = conn; > } else if (conn_out) { > *conn_out = NULL; > } > + > return found; > } > > @@ -646,7 +589,7 @@ write_ct_md(struct dp_packet *pkt, uint16_t zone, const > struct conn *conn, > if (conn->alg_related) { > key = &conn->parent_key; > } else { > - key = &conn->key; > + key = &conn->key_node[CT_DIR_FWD].key; > } > } else if (alg_exp) { > pkt->md.ct_mark = alg_exp->parent_mark; > @@ -877,7 +820,8 @@ nat_inner_packet(struct dp_packet *pkt, struct conn_key > *key, > static void > nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool > related) > { > - struct conn_key *key = reply ? &conn->key : &conn->rev_key; > + enum key_dir dir = reply ? CT_DIR_FWD : CT_DIR_REV; > + struct conn_key *key = &conn->key_node[dir].key; > uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action) > : conn->nat_action; > > @@ -911,7 +855,7 @@ conn_seq_skew_set(struct conntrack *ct, const struct conn > *conn_in, > { > struct conn *conn; > > - conn_lookup(ct, &conn_in->key, now, &conn, NULL); > + conn_lookup(ct, &conn_in->key_node[CT_DIR_FWD].key, now, &conn, NULL); > if (conn && seq_skew) { > conn->seq_skew = seq_skew; > conn->seq_skew_dir = seq_skew_dir; > @@ -947,7 +891,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet > *pkt, > OVS_REQUIRES(ct->ct_lock) > { > struct conn *nc = NULL; > - struct conn *nat_conn = NULL; > > if (!valid_new(pkt, &ctx->key)) { > pkt->md.ct_state = CS_INVALID; > @@ -961,6 +904,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet > *pkt, > } > > if (commit) { > + struct conn_key_node *fwd_key_node, *rev_key_node; > struct zone_limit *zl = zone_limit_lookup_or_default(ct, > ctx->key.zone); > if (zl && atomic_count_get(&zl->czl.count) >= zl->czl.limit) { > @@ -975,9 +919,12 @@ conn_not_found(struct conntrack *ct, struct dp_packet > *pkt, > } > > nc = new_conn(ct, pkt, &ctx->key, now, tp_id); > - memcpy(&nc->key, &ctx->key, sizeof nc->key); > - memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key); > - conn_key_reverse(&nc->rev_key); > + fwd_key_node = &nc->key_node[CT_DIR_FWD]; > + rev_key_node = &nc->key_node[CT_DIR_REV]; > + memcpy(&fwd_key_node->key, &ctx->key, sizeof fwd_key_node->key); > + memcpy(&rev_key_node->key, &fwd_key_node->key, > + sizeof rev_key_node->key); > + conn_key_reverse(&rev_key_node->key); > > if (ct_verify_helper(helper, ct_alg_ctl)) { > nc->alg = nullable_xstrdup(helper); > @@ -992,46 +939,33 @@ conn_not_found(struct conntrack *ct, struct dp_packet > *pkt, > > if (nat_action_info) { > nc->nat_action = nat_action_info->nat_action; > - nat_conn = xzalloc(sizeof *nat_conn); > > if (alg_exp) { > if (alg_exp->nat_rpl_dst) { > - nc->rev_key.dst.addr = alg_exp->alg_nat_repl_addr; > + rev_key_node->key.dst.addr = alg_exp->alg_nat_repl_addr; > nc->nat_action = NAT_ACTION_SRC; > } else { > - nc->rev_key.src.addr = alg_exp->alg_nat_repl_addr; > + rev_key_node->key.src.addr = alg_exp->alg_nat_repl_addr; > nc->nat_action = NAT_ACTION_DST; > } > } else { > - memcpy(nat_conn, nc, sizeof *nat_conn); > - bool nat_res = nat_get_unique_tuple(ct, nc, nat_conn, > - nat_action_info); > - > + bool nat_res = nat_get_unique_tuple(ct, nc, nat_action_info); > if (!nat_res) { > goto nat_res_exhaustion; > } > - > - /* Update nc with nat adjustments made to nat_conn by > - * nat_get_unique_tuple(). */ > - memcpy(nc, nat_conn, sizeof *nc); > } > > nat_packet(pkt, nc, false, ctx->icmp_related); > - memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key); > - memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key); > - nat_conn->conn_type = CT_CONN_TYPE_UN_NAT; > - nat_conn->nat_action = 0; > - nat_conn->alg = NULL; > - nat_conn->nat_conn = NULL; > - uint32_t nat_hash = conn_key_hash(&nat_conn->key, > ct->hash_basis); > - cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash); > + uint32_t rev_hash = conn_key_hash(&rev_key_node->key, > + ct->hash_basis); > + cmap_insert(&ct->conns, &rev_key_node->cm_node, rev_hash); > } > > - nc->nat_conn = nat_conn; > ovs_mutex_init_adaptive(&nc->lock); > - nc->conn_type = CT_CONN_TYPE_DEFAULT; > atomic_flag_clear(&nc->reclaimed); > - cmap_insert(&ct->conns, &nc->cm_node, ctx->hash); > + fwd_key_node->dir = CT_DIR_FWD; > + rev_key_node->dir = CT_DIR_REV; > + cmap_insert(&ct->conns, &fwd_key_node->cm_node, ctx->hash); > conn_expire_push_front(ct, nc); > atomic_count_inc(&ct->n_conn); > ctx->conn = nc; /* For completeness. */ > @@ -1052,7 +986,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet > *pkt, > * firewall rules or a separate firewall. Also using zone partitioning > * can limit DoS impact. */ > nat_res_exhaustion: > - free(nat_conn); > delete_conn__(nc); > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); > VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - " > @@ -1065,7 +998,6 @@ conn_update_state(struct conntrack *ct, struct dp_packet > *pkt, > struct conn_lookup_ctx *ctx, struct conn *conn, > long long now) > { > - ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); > bool create_new_conn = false; > > if (ctx->icmp_related) { > @@ -1092,7 +1024,8 @@ conn_update_state(struct conntrack *ct, struct > dp_packet *pkt, > pkt->md.ct_state = CS_INVALID; > break; > case CT_UPDATE_NEW: > - if (conn_lookup(ct, &conn->key, now, NULL, NULL)) { > + if (conn_lookup(ct, &conn->key_node[CT_DIR_FWD].key, > + now, NULL, NULL)) { > conn_force_expire(conn); > } > create_new_conn = true; > @@ -1268,8 +1201,10 @@ initial_conn_lookup(struct conntrack *ct, struct > conn_lookup_ctx *ctx, > > if (natted) { > if (OVS_LIKELY(ctx->conn)) { > + enum key_dir dir; > ctx->reply = !ctx->reply; > - ctx->key = ctx->reply ? ctx->conn->rev_key : ctx->conn->key; > + dir = ctx->reply ? CT_DIR_REV : CT_DIR_FWD; > + ctx->key = ctx->conn->key_node[dir].key; > ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); > } else { > /* A lookup failure does not necessarily imply that an > @@ -1302,31 +1237,13 @@ process_one(struct conntrack *ct, struct dp_packet > *pkt, > > /* Delete found entry if in wrong direction. 'force' implies commit. */ > if (OVS_UNLIKELY(force && ctx->reply && conn)) { > - if (conn_lookup(ct, &conn->key, now, NULL, NULL)) { > + if (conn_lookup(ct, &conn->key_node[CT_DIR_FWD].key, > + now, NULL, NULL)) { > conn_force_expire(conn); > } > conn = NULL; > } > > - if (OVS_LIKELY(conn)) { > - if (conn->conn_type == CT_CONN_TYPE_UN_NAT) { > - > - ctx->reply = true; > - struct conn *rev_conn = conn; /* Save for debugging. */ > - uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis); > - conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply); > - > - if (!conn) { > - pkt->md.ct_state |= CS_INVALID; > - write_ct_md(pkt, zone, NULL, NULL, NULL); > - char *log_msg = xasprintf("Missing parent conn %p", > rev_conn); > - ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true); > - free(log_msg); > - return; > - } > - } > - } > - > enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, tp_src, tp_dst, > helper); > > @@ -1419,8 +1336,9 @@ conntrack_execute(struct conntrack *ct, struct > dp_packet_batch *pkt_batch, > struct conn *conn = packet->md.conn; > if (OVS_UNLIKELY(packet->md.ct_state == CS_INVALID)) { > write_ct_md(packet, zone, NULL, NULL, NULL); > - } else if (conn && conn->key.zone == zone && !force > - && !get_alg_ctl_type(packet, tp_src, tp_dst, helper)) { > + } else if (conn && > + conn->key_node[CT_DIR_FWD].key.zone == zone && !force && > + !get_alg_ctl_type(packet, tp_src, tp_dst, helper)) { > process_one_fast(zone, setmark, setlabel, nat_action_info, > conn, packet); > } else if (OVS_UNLIKELY(!conn_key_extract(ct, packet, dl_type, &ctx, > @@ -2269,7 +2187,7 @@ nat_ipv6_addr_increment(struct in6_addr *ipv6, uint32_t > increment) > } > > static uint32_t > -nat_range_hash(const struct conn *conn, uint32_t basis, > +nat_range_hash(const struct conn_key *key, uint32_t basis, > const struct nat_action_info_t *nat_info) > { > uint32_t hash = basis; > @@ -2279,11 +2197,11 @@ nat_range_hash(const struct conn *conn, uint32_t > basis, > hash = hash_add(hash, > ((uint32_t) nat_info->max_port << 16) > | nat_info->min_port); > - hash = ct_endpoint_hash_add(hash, &conn->key.src); > - hash = ct_endpoint_hash_add(hash, &conn->key.dst); > - hash = hash_add(hash, (OVS_FORCE uint32_t) conn->key.dl_type); > - hash = hash_add(hash, conn->key.nw_proto); > - hash = hash_add(hash, conn->key.zone); > + hash = ct_endpoint_hash_add(hash, &key->src); > + hash = ct_endpoint_hash_add(hash, &key->dst); > + hash = hash_add(hash, (OVS_FORCE uint32_t) key->dl_type); > + hash = hash_add(hash, key->nw_proto); > + hash = hash_add(hash, key->zone); > > /* The purpose of the second parameter is to distinguish hashes of data > of > * different length; our data always has the same length so there is no > @@ -2357,7 +2275,7 @@ get_addr_in_range(union ct_addr *min, union ct_addr > *max, > } > > static void > -find_addr(const struct conn *conn, union ct_addr *min, > +find_addr(const struct conn_key *key, union ct_addr *min, > union ct_addr *max, union ct_addr *curr, > uint32_t hash, bool ipv4, > const struct nat_action_info_t *nat_info) > @@ -2367,9 +2285,9 @@ find_addr(const struct conn *conn, union ct_addr *min, > /* All-zero case. */ > if (!memcmp(min, &zero_ip, sizeof *min)) { > if (nat_info->nat_action & NAT_ACTION_SRC) { > - *curr = conn->key.src.addr; > + *curr = key->src.addr; > } else if (nat_info->nat_action & NAT_ACTION_DST) { > - *curr = conn->key.dst.addr; > + *curr = key->dst.addr; > } > } else { > get_addr_in_range(min, max, curr, hash, ipv4); > @@ -2388,7 +2306,7 @@ store_addr_to_key(union ct_addr *addr, struct conn_key > *key, > } > > static bool > -nat_get_unique_l4(struct conntrack *ct, struct conn *nat_conn, > +nat_get_unique_l4(struct conntrack *ct, struct conn_key *rev_key, > ovs_be16 *port, uint16_t curr, uint16_t min, > uint16_t max) > { > @@ -2411,7 +2329,7 @@ another_round: > } > > *port = htons(curr); > - if (!conn_lookup(ct, &nat_conn->rev_key, > + if (!conn_lookup(ct, rev_key, > time_msec(), NULL, NULL)) { > return true; > } > @@ -2450,39 +2368,41 @@ another_round: > * > * If none can be found, return exhaustion to the caller. */ > static bool > -nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn, > - struct conn *nat_conn, > +nat_get_unique_tuple(struct conntrack *ct, struct conn *conn, > const struct nat_action_info_t *nat_info) > { > - uint32_t hash = nat_range_hash(conn, ct->hash_basis, nat_info); > + struct conn_key *fwd_key = &conn->key_node[CT_DIR_FWD].key, > + *rev_key = &conn->key_node[CT_DIR_REV].key; > union ct_addr min_addr = {0}, max_addr = {0}, addr = {0}; > - bool pat_proto = conn->key.nw_proto == IPPROTO_TCP || > - conn->key.nw_proto == IPPROTO_UDP || > - conn->key.nw_proto == IPPROTO_SCTP; > + bool pat_proto = fwd_key->nw_proto == IPPROTO_TCP || > + fwd_key->nw_proto == IPPROTO_UDP || > + fwd_key->nw_proto == IPPROTO_SCTP; > uint16_t min_dport, max_dport, curr_dport; > uint16_t min_sport, max_sport, curr_sport; > + uint32_t hash; > > + hash = nat_range_hash(fwd_key, ct->hash_basis, nat_info); > min_addr = nat_info->min_addr; > max_addr = nat_info->max_addr; > > - find_addr(conn, &min_addr, &max_addr, &addr, hash, > - (conn->key.dl_type == htons(ETH_TYPE_IP)), nat_info); > + find_addr(fwd_key, &min_addr, &max_addr, &addr, hash, > + (fwd_key->dl_type == htons(ETH_TYPE_IP)), nat_info); > > - set_sport_range(nat_info, &conn->key, hash, &curr_sport, > - &min_sport, &max_sport); > - set_dport_range(nat_info, &conn->key, hash, &curr_dport, > - &min_dport, &max_dport); > + set_sport_range(nat_info, fwd_key, hash, &curr_sport, > + &min_sport, &max_sport); > + set_dport_range(nat_info, fwd_key, hash, &curr_dport, > + &min_dport, &max_dport); > > if (pat_proto) { > - nat_conn->rev_key.src.port = htons(curr_dport); > - nat_conn->rev_key.dst.port = htons(curr_sport); > + rev_key->src.port = htons(curr_dport); > + rev_key->dst.port = htons(curr_sport); > } > > - store_addr_to_key(&addr, &nat_conn->rev_key, > + store_addr_to_key(&addr, rev_key, > nat_info->nat_action); > > if (!pat_proto) { > - if (!conn_lookup(ct, &nat_conn->rev_key, > + if (!conn_lookup(ct, rev_key, > time_msec(), NULL, NULL)) { > return true; > } > @@ -2492,12 +2412,12 @@ nat_get_unique_tuple(struct conntrack *ct, const > struct conn *conn, > > bool found = false; > if (nat_info->nat_action & NAT_ACTION_DST_PORT) { > - found = nat_get_unique_l4(ct, nat_conn, &nat_conn->rev_key.src.port, > + found = nat_get_unique_l4(ct, rev_key, &rev_key->src.port, > curr_dport, min_dport, max_dport); > } > > if (!found) { > - found = nat_get_unique_l4(ct, nat_conn, &nat_conn->rev_key.dst.port, > + found = nat_get_unique_l4(ct, rev_key, &rev_key->dst.port, > curr_sport, min_sport, max_sport); > } > > @@ -2513,9 +2433,10 @@ conn_update(struct conntrack *ct, struct conn *conn, > struct dp_packet *pkt, > struct conn_lookup_ctx *ctx, long long now) > { > ovs_mutex_lock(&conn->lock); > + uint8_t nw_proto = conn->key_node[CT_DIR_FWD].key.nw_proto; > enum ct_update_res update_res = > - l4_protos[conn->key.nw_proto]->conn_update(ct, conn, pkt, ctx->reply, > - now); > + l4_protos[nw_proto]->conn_update(ct, conn, pkt, ctx->reply, > + now); > ovs_mutex_unlock(&conn->lock); > return update_res; > } > @@ -2543,10 +2464,7 @@ conn_expiration(const struct conn *conn) > static bool > conn_expired(struct conn *conn, long long now) > { > - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) { > - return now >= conn_expiration(conn); > - } > - return false; > + return now >= conn_expiration(conn); > } > > static bool > @@ -2572,9 +2490,7 @@ delete_conn__(struct conn *conn) > static void > delete_conn(struct conn *conn) > { > - ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); > ovs_mutex_destroy(&conn->lock); > - free(conn->nat_conn); > delete_conn__(conn); > } > > @@ -2667,15 +2583,18 @@ static void > conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry, > long long now) > { > + const struct conn_key *key = &conn->key_node[CT_DIR_FWD].key, > + *rev_key = &conn->key_node[CT_DIR_REV].key; > + > memset(entry, 0, sizeof *entry); > - conn_key_to_tuple(&conn->key, &entry->tuple_orig); > - conn_key_to_tuple(&conn->rev_key, &entry->tuple_reply); > + conn_key_to_tuple(key, &entry->tuple_orig); > + conn_key_to_tuple(rev_key, &entry->tuple_reply); > > if (conn->alg_related) { > conn_key_to_tuple(&conn->parent_key, &entry->tuple_parent); > } > > - entry->zone = conn->key.zone; > + entry->zone = key->zone; > > ovs_mutex_lock(&conn->lock); > entry->mark = conn->mark; > @@ -2683,7 +2602,7 @@ conn_to_ct_dpif_entry(const struct conn *conn, struct > ct_dpif_entry *entry, > > long long expiration = conn_expiration(conn) - now; > > - struct ct_l4_proto *class = l4_protos[conn->key.nw_proto]; > + struct ct_l4_proto *class = l4_protos[key->nw_proto]; > if (class->conn_get_protoinfo) { > class->conn_get_protoinfo(conn, &entry->protoinfo); > } > @@ -2731,15 +2650,17 @@ conntrack_dump_next(struct conntrack_dump *dump, > struct ct_dpif_entry *entry) > if (!cm_node) { > break; > } > + struct conn_key_node *keyn; > struct conn *conn; > - INIT_CONTAINER(conn, cm_node, cm_node); > > + INIT_CONTAINER(keyn, cm_node, cm_node); > + conn = CONTAINER_OF(keyn, struct conn, key_node[keyn->dir]); > if (conn_expired(conn, now)) { > continue; > } > > - if ((!dump->filter_zone || conn->key.zone == dump->zone) && > - (conn->conn_type != CT_CONN_TYPE_UN_NAT)) { > + if ((!dump->filter_zone || keyn->key.zone == dump->zone) && > + (keyn->dir == CT_DIR_FWD)) { > conn_to_ct_dpif_entry(conn, entry, now); > return 0; > } > @@ -2823,14 +2744,15 @@ conntrack_exp_dump_done(struct conntrack_dump *dump > OVS_UNUSED) > int > conntrack_flush(struct conntrack *ct, const uint16_t *zone) > { > + struct conn_key_node *keyn; > struct conn *conn; > > - CMAP_FOR_EACH (conn, cm_node, &ct->conns) { > - if (conn->conn_type != CT_CONN_TYPE_DEFAULT) { > + CMAP_FOR_EACH (keyn, cm_node, &ct->conns) { > + if (keyn->dir != CT_DIR_FWD) { > continue; > } > - > - if (!zone || *zone == conn->key.zone) { > + conn = CONTAINER_OF(keyn, struct conn, key_node[keyn->dir]); > + if (!zone || *zone == keyn->key.zone) { > conn_clean(ct, conn); > } > } > @@ -2842,18 +2764,18 @@ int > conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple > *tuple, > uint16_t zone) > { > - int error = 0; > struct conn_key key; > struct conn *conn; > + int error = 0; > > memset(&key, 0, sizeof(key)); > tuple_to_conn_key(tuple, zone, &key); > conn_lookup(ct, &key, time_msec(), &conn, NULL); > > - if (conn && conn->conn_type == CT_CONN_TYPE_DEFAULT) { > + if (conn) { > conn_clean(ct, conn); > } else { > - VLOG_WARN("Must flush tuple using the original pre-NATed tuple"); > + VLOG_WARN("Tuple not found"); > error = ENOENT; > } > > @@ -2996,50 +2918,52 @@ expectation_create(struct conntrack *ct, ovs_be16 > dst_port, > const struct conn *parent_conn, bool reply, bool > src_ip_wc, > bool skip_nat) > { > + const struct conn_key *pconn_key = > &parent_conn->key_node[CT_DIR_FWD].key, > + *pconn_rev_key = &parent_conn->key_node[CT_DIR_REV].key; > union ct_addr src_addr; > union ct_addr dst_addr; > union ct_addr alg_nat_repl_addr; > struct alg_exp_node *alg_exp_node = xzalloc(sizeof *alg_exp_node); > > if (reply) { > - src_addr = parent_conn->key.src.addr; > - dst_addr = parent_conn->key.dst.addr; > + src_addr = pconn_key->src.addr; > + dst_addr = pconn_key->dst.addr; > alg_exp_node->nat_rpl_dst = true; > if (skip_nat) { > alg_nat_repl_addr = dst_addr; > } else if (parent_conn->nat_action & NAT_ACTION_DST) { > - alg_nat_repl_addr = parent_conn->rev_key.src.addr; > + alg_nat_repl_addr = pconn_rev_key->src.addr; > alg_exp_node->nat_rpl_dst = false; > } else { > - alg_nat_repl_addr = parent_conn->rev_key.dst.addr; > + alg_nat_repl_addr = pconn_rev_key->dst.addr; > } > } else { > - src_addr = parent_conn->rev_key.src.addr; > - dst_addr = parent_conn->rev_key.dst.addr; > + src_addr = pconn_rev_key->src.addr; > + dst_addr = pconn_rev_key->dst.addr; > alg_exp_node->nat_rpl_dst = false; > if (skip_nat) { > alg_nat_repl_addr = src_addr; > } else if (parent_conn->nat_action & NAT_ACTION_DST) { > - alg_nat_repl_addr = parent_conn->key.dst.addr; > + alg_nat_repl_addr = pconn_key->dst.addr; > alg_exp_node->nat_rpl_dst = true; > } else { > - alg_nat_repl_addr = parent_conn->key.src.addr; > + alg_nat_repl_addr = pconn_key->src.addr; > } > } > if (src_ip_wc) { > memset(&src_addr, 0, sizeof src_addr); > } > > - alg_exp_node->key.dl_type = parent_conn->key.dl_type; > - alg_exp_node->key.nw_proto = parent_conn->key.nw_proto; > - alg_exp_node->key.zone = parent_conn->key.zone; > + alg_exp_node->key.dl_type = pconn_key->dl_type; > + alg_exp_node->key.nw_proto = pconn_key->nw_proto; > + alg_exp_node->key.zone = pconn_key->zone; > alg_exp_node->key.src.addr = src_addr; > alg_exp_node->key.dst.addr = dst_addr; > alg_exp_node->key.src.port = ALG_WC_SRC_PORT; > alg_exp_node->key.dst.port = dst_port; > alg_exp_node->parent_mark = parent_conn->mark; > alg_exp_node->parent_label = parent_conn->label; > - memcpy(&alg_exp_node->parent_key, &parent_conn->key, > + memcpy(&alg_exp_node->parent_key, pconn_key, > sizeof alg_exp_node->parent_key); > /* Take the write lock here because it is almost 100% > * likely that the lookup will fail and > @@ -3291,12 +3215,16 @@ process_ftp_ctl_v4(struct conntrack *ct, > > switch (mode) { > case CT_FTP_MODE_ACTIVE: > - *v4_addr_rep = conn_for_expectation->rev_key.dst.addr.ipv4; > - conn_ipv4_addr = conn_for_expectation->key.src.addr.ipv4; > + *v4_addr_rep = > + conn_for_expectation->key_node[CT_DIR_REV].key.dst.addr.ipv4; > + conn_ipv4_addr = > + conn_for_expectation->key_node[CT_DIR_FWD].key.src.addr.ipv4; > break; > case CT_FTP_MODE_PASSIVE: > - *v4_addr_rep = conn_for_expectation->key.dst.addr.ipv4; > - conn_ipv4_addr = conn_for_expectation->rev_key.src.addr.ipv4; > + *v4_addr_rep = > + conn_for_expectation->key_node[CT_DIR_FWD].key.dst.addr.ipv4; > + conn_ipv4_addr = > + conn_for_expectation->key_node[CT_DIR_REV].key.src.addr.ipv4; > break; > case CT_TFTP_MODE: > default: > @@ -3328,7 +3256,7 @@ skip_ipv6_digits(char *str) > static enum ftp_ctl_pkt > process_ftp_ctl_v6(struct conntrack *ct, > struct dp_packet *pkt, > - const struct conn *conn_for_expectation, > + const struct conn *conn_for_exp, > union ct_addr *v6_addr_rep, char **ftp_data_start, > size_t *addr_offset_from_ftp_data_start, > size_t *addr_size, enum ct_alg_mode *mode) > @@ -3396,24 +3324,25 @@ process_ftp_ctl_v6(struct conntrack *ct, > > switch (*mode) { > case CT_FTP_MODE_ACTIVE: > - *v6_addr_rep = conn_for_expectation->rev_key.dst.addr; > + *v6_addr_rep = conn_for_exp->key_node[CT_DIR_REV].key.dst.addr; > /* Although most servers will block this exploit, there may be some > * less well managed. */ > if (memcmp(&ip6_addr, &v6_addr_rep->ipv6, sizeof ip6_addr) && > - memcmp(&ip6_addr, &conn_for_expectation->key.src.addr.ipv6, > + memcmp(&ip6_addr, > + &conn_for_exp->key_node[CT_DIR_FWD].key.src.addr.ipv6, > sizeof ip6_addr)) { > return CT_FTP_CTL_INVALID; > } > break; > case CT_FTP_MODE_PASSIVE: > - *v6_addr_rep = conn_for_expectation->key.dst.addr; > + *v6_addr_rep = conn_for_exp->key_node[CT_DIR_FWD].key.dst.addr; > break; > case CT_TFTP_MODE: > default: > OVS_NOT_REACHED(); > } > > - expectation_create(ct, port, conn_for_expectation, > + expectation_create(ct, port, conn_for_exp, > !!(pkt->md.ct_state & CS_REPLY_DIR), false, false); > return CT_FTP_CTL_INTEREST; > } > @@ -3571,7 +3500,8 @@ handle_tftp_ctl(struct conntrack *ct, > long long now OVS_UNUSED, enum ftp_ctl_pkt ftp_ctl > OVS_UNUSED, > bool nat OVS_UNUSED) > { > - expectation_create(ct, conn_for_expectation->key.src.port, > + expectation_create(ct, > + > conn_for_expectation->key_node[CT_DIR_FWD].key.src.port, > conn_for_expectation, > !!(pkt->md.ct_state & CS_REPLY_DIR), false, false); > } > > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev