When trying to delete user-defined chains in a large ruleset,
iptables-nft aborts with "No buffer space available". This can be
reproduced using the following script:

| #! /bin/bash
| iptables-nft-restore <(
|
| echo "*filter"
| for i in $(seq 0 200000);do
|         printf ":chain_%06x - [0:0]\n" $i
| done
| for i in $(seq 0 200000);do
|         printf -- "-A INPUT -j chain_%06x\n" $i
|         printf -- "-A INPUT -j chain_%06x\n" $i
| done
| echo COMMIT
|
| )
| iptables-nft -X

The problem seems to be the sheer amount of netlink error messages sent
back to user space (one EBUSY for each chain). To solve this, set
receive buffer size depending on number of commands sent to kernel.

Suggested-by: Pablo Neira Ayuso <pa...@netfilter.org>
Signed-off-by: Phil Sutter <p...@nwl.cc>
---
Changes since v1:
- Reset stored receive buffer size to zero in nft_restart().
---
 iptables/nft.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/iptables/nft.c b/iptables/nft.c
index 2c61521455de8..3aa2c6c6b9166 100644
--- a/iptables/nft.c
+++ b/iptables/nft.c
@@ -206,8 +206,24 @@ static void mnl_set_sndbuffer(const struct mnl_socket *nl,
        nlbuffsiz = newbuffsiz;
 }
 
+static int nlrcvbuffsiz;
+
+static void mnl_set_rcvbuffer(const struct mnl_socket *nl, int numcmds)
+{
+       int newbuffsiz = getpagesize() * numcmds;
+
+       if (newbuffsiz <= nlrcvbuffsiz)
+               return;
+
+       if (setsockopt(mnl_socket_get_fd(nl), SOL_SOCKET, SO_RCVBUFFORCE,
+                      &newbuffsiz, sizeof(socklen_t)) < 0)
+               return;
+
+       nlrcvbuffsiz = newbuffsiz;
+}
+
 static ssize_t mnl_nft_socket_sendmsg(const struct mnl_socket *nf_sock,
-                                     struct nftnl_batch *batch)
+                                     struct nftnl_batch *batch, int numcmds)
 {
        static const struct sockaddr_nl snl = {
                .nl_family = AF_NETLINK
@@ -222,13 +238,15 @@ static ssize_t mnl_nft_socket_sendmsg(const struct 
mnl_socket *nf_sock,
        };
 
        mnl_set_sndbuffer(nf_sock, batch);
+       mnl_set_rcvbuffer(nf_sock, numcmds);
        nftnl_batch_iovec(batch, iov, iov_len);
 
        return sendmsg(mnl_socket_get_fd(nf_sock), &msg, 0);
 }
 
 static int mnl_batch_talk(const struct mnl_socket *nf_sock,
-                         struct nftnl_batch *batch, struct list_head *err_list)
+                         struct nftnl_batch *batch, int numcmds,
+                         struct list_head *err_list)
 {
        const struct mnl_socket *nl = nf_sock;
        int ret, fd = mnl_socket_get_fd(nl), portid = mnl_socket_get_portid(nl);
@@ -240,7 +258,7 @@ static int mnl_batch_talk(const struct mnl_socket *nf_sock,
        };
        int err = 0;
 
-       ret = mnl_nft_socket_sendmsg(nf_sock, batch);
+       ret = mnl_nft_socket_sendmsg(nf_sock, batch, numcmds);
        if (ret == -1)
                return -1;
 
@@ -795,6 +813,7 @@ static int nft_restart(struct nft_handle *h)
 
        h->portid = mnl_socket_get_portid(h->nl);
        nlbuffsiz = 0;
+       nlrcvbuffsiz = 0;
 
        return 0;
 }
@@ -2917,7 +2936,7 @@ retry:
        }
 
        errno = 0;
-       ret = mnl_batch_talk(h->nl, h->batch, &h->err_list);
+       ret = mnl_batch_talk(h->nl, h->batch, seq, &h->err_list);
        if (ret && errno == ERESTART) {
                nft_rebuild_cache(h);
 
-- 
2.21.0

Reply via email to