Since commit d265929930e2 ("netfilter: nf_conncount: reduce unnecessary
GC") if nftables/iptables connlimit is used without a check for new
connections there can be duplicated entries tracked.

Pass the nf_conn struct directly to the nf_conncount API and check
whether the connection is confirmed or not inside nf_conncount_add(). If
the connection is confirmed, skip it.

Fixes: d265929930e2 ("netfilter: nf_conncount: reduce unnecessary GC")
Signed-off-by: Fernando Fernandez Mancera <[email protected]>
---
 include/net/netfilter/nf_conntrack_count.h | 10 +---
 net/netfilter/nf_conncount.c               | 70 ++++++++++++----------
 net/netfilter/nft_connlimit.c              | 35 ++++++-----
 net/netfilter/xt_connlimit.c               | 28 +++++----
 net/openvswitch/conntrack.c                | 14 ++---
 5 files changed, 81 insertions(+), 76 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_count.h 
b/include/net/netfilter/nf_conntrack_count.h
index 1b58b5b91ff6..4fc2fb03d093 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -18,15 +18,11 @@ struct nf_conncount_list {
 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int 
keylen);
 void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data);
 
-unsigned int nf_conncount_count(struct net *net,
+unsigned int nf_conncount_count(struct net *net, const struct nf_conn *ct,
                                struct nf_conncount_data *data,
-                               const u32 *key,
-                               const struct nf_conntrack_tuple *tuple,
-                               const struct nf_conntrack_zone *zone);
+                               const u32 *key);
 
-int nf_conncount_add(struct net *net, struct nf_conncount_list *list,
-                    const struct nf_conntrack_tuple *tuple,
-                    const struct nf_conntrack_zone *zone);
+int nf_conncount_add(const struct nf_conn *ct, struct nf_conncount_list *list);
 
 void nf_conncount_list_init(struct nf_conncount_list *list);
 
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 913ede2f57f9..803589f4eaa1 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -122,16 +122,22 @@ find_or_evict(struct net *net, struct nf_conncount_list 
*list,
        return ERR_PTR(-EAGAIN);
 }
 
-static int __nf_conncount_add(struct net *net,
-                             struct nf_conncount_list *list,
-                             const struct nf_conntrack_tuple *tuple,
-                             const struct nf_conntrack_zone *zone)
+static int __nf_conncount_add(const struct nf_conn *ct,
+                             struct nf_conncount_list *list)
 {
        const struct nf_conntrack_tuple_hash *found;
        struct nf_conncount_tuple *conn, *conn_n;
+       const struct nf_conntrack_tuple *tuple;
+       const struct nf_conntrack_zone *zone;
        struct nf_conn *found_ct;
        unsigned int collect = 0;
 
+       if (!ct || nf_ct_is_confirmed(ct))
+               return -EINVAL;
+
+       tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+       zone = nf_ct_zone(ct);
+
        if ((u32)jiffies == list->last_gc)
                goto add_new_node;
 
@@ -140,7 +146,7 @@ static int __nf_conncount_add(struct net *net,
                if (collect > CONNCOUNT_GC_MAX_NODES)
                        break;
 
-               found = find_or_evict(net, list, conn);
+               found = find_or_evict(nf_ct_net(ct), list, conn);
                if (IS_ERR(found)) {
                        /* Not found, but might be about to be confirmed */
                        if (PTR_ERR(found) == -EAGAIN) {
@@ -198,16 +204,13 @@ static int __nf_conncount_add(struct net *net,
        return 0;
 }
 
-int nf_conncount_add(struct net *net,
-                    struct nf_conncount_list *list,
-                    const struct nf_conntrack_tuple *tuple,
-                    const struct nf_conntrack_zone *zone)
+int nf_conncount_add(const struct nf_conn *ct, struct nf_conncount_list *list)
 {
        int ret;
 
        /* check the saved connections */
        spin_lock_bh(&list->list_lock);
-       ret = __nf_conncount_add(net, list, tuple, zone);
+       ret = __nf_conncount_add(ct, list);
        spin_unlock_bh(&list->list_lock);
 
        return ret;
@@ -308,21 +311,22 @@ static void schedule_gc_worker(struct nf_conncount_data 
*data, int tree)
 }
 
 static unsigned int
-insert_tree(struct net *net,
+insert_tree(const struct nf_conn *ct,
            struct nf_conncount_data *data,
            struct rb_root *root,
            unsigned int hash,
-           const u32 *key,
-           const struct nf_conntrack_tuple *tuple,
-           const struct nf_conntrack_zone *zone)
+           const u32 *key)
 {
        struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
+       const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+       const struct nf_conntrack_tuple *tuple;
        struct rb_node **rbnode, *parent;
        struct nf_conncount_rb *rbconn;
        struct nf_conncount_tuple *conn;
        unsigned int count = 0, gc_count = 0;
        bool do_gc = true;
 
+       tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
        spin_lock_bh(&nf_conncount_locks[hash]);
 restart:
        parent = NULL;
@@ -340,8 +344,8 @@ insert_tree(struct net *net,
                } else {
                        int ret;
 
-                       ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
-                       if (ret)
+                       ret = nf_conncount_add(ct, &rbconn->list);
+                       if (ret && ret != -EINVAL)
                                count = 0; /* hotdrop */
                        else
                                count = rbconn->list.count;
@@ -352,7 +356,7 @@ insert_tree(struct net *net,
                if (gc_count >= ARRAY_SIZE(gc_nodes))
                        continue;
 
-               if (do_gc && nf_conncount_gc_list(net, &rbconn->list))
+               if (do_gc && nf_conncount_gc_list(nf_ct_net(ct), &rbconn->list))
                        gc_nodes[gc_count++] = rbconn;
        }
 
@@ -394,11 +398,9 @@ insert_tree(struct net *net,
 }
 
 static unsigned int
-count_tree(struct net *net,
+count_tree(struct net *net, const struct nf_conn *ct,
           struct nf_conncount_data *data,
-          const u32 *key,
-          const struct nf_conntrack_tuple *tuple,
-          const struct nf_conntrack_zone *zone)
+          const u32 *key)
 {
        struct rb_root *root;
        struct rb_node *parent;
@@ -422,7 +424,7 @@ count_tree(struct net *net,
                } else {
                        int ret;
 
-                       if (!tuple) {
+                       if (!ct) {
                                nf_conncount_gc_list(net, &rbconn->list);
                                return rbconn->list.count;
                        }
@@ -437,19 +439,23 @@ count_tree(struct net *net,
                        }
 
                        /* same source network -> be counted! */
-                       ret = __nf_conncount_add(net, &rbconn->list, tuple, 
zone);
+                       ret = __nf_conncount_add(ct, &rbconn->list);
                        spin_unlock_bh(&rbconn->list.list_lock);
-                       if (ret)
+                       if (ret && ret != -EINVAL) {
                                return 0; /* hotdrop */
-                       else
+                       } else {
+                               /* -EINVAL means add was skipped, update the 
list */
+                               if (ret == -EINVAL)
+                                       nf_conncount_gc_list(net, 
&rbconn->list);
                                return rbconn->list.count;
+                       }
                }
        }
 
-       if (!tuple)
+       if (!ct)
                return 0;
 
-       return insert_tree(net, data, root, hash, key, tuple, zone);
+       return insert_tree(ct, data, root, hash, key);
 }
 
 static void tree_gc_worker(struct work_struct *work)
@@ -511,16 +517,14 @@ static void tree_gc_worker(struct work_struct *work)
 }
 
 /* Count and return number of conntrack entries in 'net' with particular 'key'.
- * If 'tuple' is not null, insert it into the accounting data structure.
+ * If 'ct' is not null, insert the tuple into the accounting data structure.
  * Call with RCU read lock.
  */
-unsigned int nf_conncount_count(struct net *net,
+unsigned int nf_conncount_count(struct net *net, const struct nf_conn *ct,
                                struct nf_conncount_data *data,
-                               const u32 *key,
-                               const struct nf_conntrack_tuple *tuple,
-                               const struct nf_conntrack_zone *zone)
+                               const u32 *key)
 {
-       return count_tree(net, data, key, tuple, zone);
+       return count_tree(net, ct, data, key);
 }
 EXPORT_SYMBOL_GPL(nf_conncount_count);
 
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index fc35a11cdca2..e815c0235b62 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -24,28 +24,35 @@ static inline void nft_connlimit_do_eval(struct 
nft_connlimit *priv,
                                         const struct nft_pktinfo *pkt,
                                         const struct nft_set_ext *ext)
 {
-       const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
-       const struct nf_conntrack_tuple *tuple_ptr;
+       struct nf_conntrack_tuple_hash *h;
        struct nf_conntrack_tuple tuple;
        enum ip_conntrack_info ctinfo;
        const struct nf_conn *ct;
        unsigned int count;
-
-       tuple_ptr = &tuple;
+       int err;
 
        ct = nf_ct_get(pkt->skb, &ctinfo);
-       if (ct != NULL) {
-               tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-               zone = nf_ct_zone(ct);
-       } else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
-                                     nft_pf(pkt), nft_net(pkt), &tuple)) {
-               regs->verdict.code = NF_DROP;
-               return;
+       if (!ct) {
+               if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
+                                      nft_pf(pkt), nft_net(pkt), &tuple)) {
+                       regs->verdict.code = NF_DROP;
+                       return;
+               }
+
+               h = nf_conntrack_find_get(nft_net(pkt), &nf_ct_zone_dflt, 
&tuple);
+               if (!h) {
+                       regs->verdict.code = NF_DROP;
+                       return;
+               }
+               ct = nf_ct_tuplehash_to_ctrack(h);
        }
 
-       if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) {
-               regs->verdict.code = NF_DROP;
-               return;
+       err = nf_conncount_add(ct, priv->list);
+       if (err) {
+               if (err != -EINVAL) {
+                       regs->verdict.code = NF_DROP;
+                       return;
+               }
        }
 
        count = READ_ONCE(priv->list->count);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 0189f8b6b0bd..a91265e9e707 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -29,23 +29,26 @@
 static bool
 connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-       struct net *net = xt_net(par);
        const struct xt_connlimit_info *info = par->matchinfo;
+       struct nf_conntrack_tuple_hash *h;
        struct nf_conntrack_tuple tuple;
-       const struct nf_conntrack_tuple *tuple_ptr = &tuple;
-       const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
        enum ip_conntrack_info ctinfo;
+       struct net *net = xt_net(par);
        const struct nf_conn *ct;
        unsigned int connections;
        u32 key[5];
 
        ct = nf_ct_get(skb, &ctinfo);
-       if (ct != NULL) {
-               tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-               zone = nf_ct_zone(ct);
-       } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-                                     xt_family(par), net, &tuple)) {
-               goto hotdrop;
+       if (!ct) {
+               if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+                                      xt_family(par), net, &tuple))
+                       goto hotdrop;
+
+               h = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+               if (!h)
+                       goto hotdrop;
+
+               ct = nf_ct_tuplehash_to_ctrack(h);
        }
 
        if (xt_family(par) == NFPROTO_IPV6) {
@@ -59,18 +62,17 @@ connlimit_mt(const struct sk_buff *skb, struct 
xt_action_param *par)
                for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
                        addr.ip6[i] &= info->mask.ip6[i];
                memcpy(key, &addr, sizeof(addr.ip6));
-               key[4] = zone->id;
+               key[4] = nf_ct_zone(ct)->id;
        } else {
                const struct iphdr *iph = ip_hdr(skb);
 
                key[0] = (info->flags & XT_CONNLIMIT_DADDR) ?
                         (__force __u32)iph->daddr : (__force __u32)iph->saddr;
                key[0] &= (__force __u32)info->mask.ip;
-               key[1] = zone->id;
+               key[1] = nf_ct_zone(ct)->id;
        }
 
-       connections = nf_conncount_count(net, info->data, key, tuple_ptr,
-                                        zone);
+       connections = nf_conncount_count(net, ct, info->data, key);
        if (connections == 0)
                /* kmalloc failed, drop it entirely */
                goto hotdrop;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e573e9221302..e23a0de48ff9 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -929,7 +929,7 @@ static u32 ct_limit_get(const struct ovs_ct_limit_info 
*info, u16 zone)
 
 static int ovs_ct_check_limit(struct net *net,
                              const struct ovs_conntrack_info *info,
-                             const struct nf_conntrack_tuple *tuple)
+                             const struct nf_conn *ct)
 {
        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
        const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
@@ -942,8 +942,8 @@ static int ovs_ct_check_limit(struct net *net,
        if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED)
                return 0;
 
-       connections = nf_conncount_count(net, ct_limit_info->data,
-                                        &conncount_key, tuple, &info->zone);
+       connections = nf_conncount_count(net, ct, ct_limit_info->data,
+                                        &conncount_key);
        if (connections > per_zone_limit)
                return -ENOMEM;
 
@@ -972,8 +972,7 @@ static int ovs_ct_commit(struct net *net, struct 
sw_flow_key *key,
 #if    IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
        if (static_branch_unlikely(&ovs_ct_limit_enabled)) {
                if (!nf_ct_is_confirmed(ct)) {
-                       err = ovs_ct_check_limit(net, info,
-                               &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+                       err = ovs_ct_check_limit(net, info, ct);
                        if (err) {
                                net_warn_ratelimited("openvswitch: zone: %u "
                                        "exceeds conntrack limit\n",
@@ -1762,16 +1761,13 @@ static int __ovs_ct_limit_get_zone_limit(struct net 
*net,
                                         u16 zone_id, u32 limit,
                                         struct sk_buff *reply)
 {
-       struct nf_conntrack_zone ct_zone;
        struct ovs_zone_limit zone_limit;
        u32 conncount_key = zone_id;
 
        zone_limit.zone_id = zone_id;
        zone_limit.limit = limit;
-       nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0);
 
-       zone_limit.count = nf_conncount_count(net, data, &conncount_key, NULL,
-                                             &ct_zone);
+       zone_limit.count = nf_conncount_count(net, NULL, data, &conncount_key);
        return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
 }
 
-- 
2.51.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to