On Mon, Jan 10, 2022 at 11:47:57PM +0100, Tomas Hlavacek wrote: > Add netlink KRT dump filter on Linux to avoid PMTU cache records from FNHE > table dump along with KRT. > > Linux Kernel added FNHE table dump to the netlink API in patch > > https://patchwork.ozlabs.org/project/netdev/patch/8d3b68cd37fb5fddc470904cdd6793fcf480c6c1.1561131177.git.sbri...@redhat.com/ > > The filter mitigates the risk of receiving unknown and potentially large > number of FNHE records that would block BIRD I/O in each sync. There is a > known issue caused by the GRE tunnels on Linux that seems to be creating > one FNHE record for each destination IP address that is routed through the > tunnel, even when the PMTU equals to GRE interface MTU (tested with kernel > 5.5 - 5.16-rc7).
Thanks, merged with some modifications: https://gitlab.nic.cz/labs/bird/-/commit/e818f16448e918ed07633480291283f3449dd9e4 Instead of switching NETLINK_GET_STRICT_CHK on and off, i just used strict checking for all dumps (including link and address). Also, removed the SO_SNDBUF/SO_RCVBUF change. That seems unrelated and has some issues: 1) Why these values? 32k for SO_SNDBUF is smaller than the default value (208k), so it in fact makes the buffer smaller (which probably does not matter). While 1M for SO_RCVBUF is bigger that max value, so it is capped at 416k. 2) It applies just for nl_scan and nl_req, and not for async fd, where it makes most sense. 3) We may want big rx buffer for async fd, in this case we may consider using SO_SNDBUFFORCE. I am not sure which netlink socket operations are really synchronous or with flow control, so big buffer is not needed. > --- > sysdep/linux/netlink.c | 44 +++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 41 insertions(+), 3 deletions(-) > > diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c > index f85bcf35..79414122 100644 > --- a/sysdep/linux/netlink.c > +++ b/sysdep/linux/netlink.c > @@ -128,7 +128,7 @@ struct nl_sock > uint last_size; > }; > > -#define NL_RX_SIZE 8192 > +#define NL_RX_SIZE 32768 > > #define NL_OP_DELETE 0 > #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL) > @@ -143,11 +143,18 @@ static struct nl_sock nl_req = {.fd = -1}; /* > Netlink socket for requests */ > static void > nl_open_sock(struct nl_sock *nl) > { > + int sndbuf = 32768; > + int rcvbuf = 1024*1024; > + > if (nl->fd < 0) > { > - nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); > + nl->fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); > if (nl->fd < 0) > die("Unable to open rtnetlink socket: %m"); > + > + setsockopt(nl->fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf)); > + setsockopt(nl->fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)); > + > nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */ > nl->rx_buffer = xmalloc(NL_RX_SIZE); > nl->last_hdr = NULL; > @@ -155,6 +162,12 @@ nl_open_sock(struct nl_sock *nl) > } > } > > +static void > +nl_set_strict_dump(struct nl_sock *nl, int strict) > +{ > + setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, > sizeof(strict)); > +} > + > static void > nl_open(void) > { > @@ -192,6 +205,29 @@ nl_request_dump(int af, int cmd) > nl_send(&nl_scan, &req.nh); > } > > +static void > +nl_request_dump_rt(int af, int cmd) > +{ > + struct { > + struct nlmsghdr nh; > + struct rtmsg rtm; > + char buf[128]; > + } req = { > + .nh.nlmsg_type = cmd, > + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)), > + .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP, > + .nh.nlmsg_seq = ++(nl_scan.seq), > + .nh.nlmsg_pid = 0, > + .rtm.rtm_protocol = RTPROT_UNSPEC, > + .rtm.rtm_family = af > + /* .rtm.rtm_flags is defaults to zero, hence RTM_F_CLONED is not set */ > + }; > + > + send(nl_scan.fd, &req, sizeof(req), 0); > + nl_scan.last_hdr = NULL; > +} > + > + > static struct nlmsghdr * > nl_get_reply(struct nl_sock *nl) > { > @@ -1864,13 +1900,15 @@ krt_do_scan(struct krt_proto *p UNUSED) /* > CONFIG_ALL_TABLES_AT_ONCE => p is NUL > struct nl_parse_state s; > > nl_parse_begin(&s, 1); > - nl_request_dump(AF_UNSPEC, RTM_GETROUTE); > + nl_set_strict_dump(&nl_scan, 1); > + nl_request_dump_rt(AF_UNSPEC, RTM_GETROUTE); > while (h = nl_get_scan()) > if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) > nl_parse_route(&s, h); > else > log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", > h->nlmsg_type); > nl_parse_end(&s); > + nl_set_strict_dump(&nl_scan, 0); > } > > /* > -- > 2.25.1 -- Elen sila lumenn' omentielvo Ondrej 'Santiago' Zajicek (email: santi...@crfreenet.org) OpenPGP encrypted e-mails preferred (KeyID 0x11DEADC3, wwwkeys.pgp.net) "To err is human -- to blame it on a computer is even more so."