This patch revisits this glibc bug:

  https://sourceware.org/bugzilla/show_bug.cgi?id=12926

For some reason, this particular code path is very good at picking up
file descriptors which have been reused in correctly.  This happens if
other threads have a race, close the wrong file descriptor (the one used
in the glibc netlink code), and reopen another one in its place.

The netlink requests we send to the kernel are:

  struct req
  {
    struct nlmsghdr nlh;
    struct rtgenmsg g;
    /* struct rtgenmsg consists of a single byte.  This means there
       are three bytes of padding included in the REQ definition.
       We make them explicit here.  */
    char pad[3];
  } req;

  req.nlh.nlmsg_len = sizeof (req);
  req.nlh.nlmsg_type = RTM_GETADDR;
  req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
  req.nlh.nlmsg_pid = 0;
  req.nlh.nlmsg_seq = time (NULL);
  req.g.rtgen_family = AF_UNSPEC;

  req.nlh.nlmsg_len = sizeof (req);
  req.nlh.nlmsg_type = RTM_GETLINK;
  req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
  req.nlh.nlmsg_pid = 0;
  req.nlh.nlmsg_seq = time (NULL);
  req.g.rtgen_family = AF_UNSPEC;

I discussed this with Hannes and he thinks that a zero-length reply (as
received by recvmsg) is impossible at this point, for these specific
types of netlink requests.  The new assert triggers for zero-length
replies, but also for replies less than sizeof (struct nlmsghdr) bytes
long, and for unexpected errors (EBADF, ENOTSOCK, ENOTCONN,
ECONNREFUSED, and EAGAIN on a non-blocking sockets—ours are all blocking).

This is purely a defense against silent data corruption and bug reports
incorrectly blaming glibc (or the wrong part of glibc at least).  I
added it to all three copies of the netlink code in glibc.

The glibc netlink code is still broken: It does not time out and retry
(needed in case the request gets lots), does not handle NLM_F_DUMP_INTR,
and does not deal with NLMSG_ERROR and ENOBUFS.  But these are separate
issues.  SOCK_CLOEXEC is not used, either. If we fix those issues, the
assert would remain in place, except for the EAGAIN part.

(By the way, we'd also love to have a better kernel interface to fulfill
the needs for getaddrinfo address sorting.  The netlink requests we
currently use are much too slow if the host has many addresses configured.)

I have tested that basic getaddrinfo operations still work after the
patch, but glibc testsuite coverage in this area is very limited, and I
have yet to do full-system testing with this patch.

Florian
Terminate process on invalid netlink response from kernel [BZ #12926]

The recvmsg system calls for netlink sockets have been particularly
prone to picking up unrelated data after a file descriptor race
(where the descriptor is closed and reopened concurrently in a
multi-threaded process, as the result of a file descriptor
management issue elsewhere).  This commit adds additional error
checking and aborts the process if a datagram of unexpected length
(without the netlink header) is received, or an error code which
cannot happen due to the way the netlink socket is used.

2015-10-23  Florian Weimer  <fwei...@redhat.com>

	[BZ #12926]
	Terminate process on invalid netlink response.
	* sysdeps/unix/sysv/linux/netlinkaccess.h
	(__netlink_assert_response): Declare.
	* sysdeps/unix/sysv/linux/netlink_assert_response.c: New file.
	* sysdeps/unix/sysv/linux/Makefile [$(subdir) == inet]
	(sysdep_routines): Add netlink_assert_response.
	* sysdeps/unix/sysv/linux/check_native.c (__check_native): Call
	__netlink_assert_response.
	* sysdeps/unix/sysv/linux/check_pf.c (make_request): Likewise.
	* sysdeps/unix/sysv/linux/ifaddrs.c (__netlink_request): Likewise.
	* sysdeps/unix/sysv/linux/Versions (GLIBC_PRIVATE): Add
	__netlink_assert_response.

diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index 2c67a66..d6cc529 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -151,6 +151,7 @@ sysdep_headers += netinet/if_fddi.h netinet/if_tr.h \
 		  netipx/ipx.h netash/ash.h netax25/ax25.h netatalk/at.h \
 		  netrom/netrom.h netpacket/packet.h netrose/rose.h \
 		  neteconet/ec.h netiucv/iucv.h
+sysdep_routines += netlink_assert_response
 endif
 
 # Don't compile the ctype glue code, since there is no old non-GNU C library.
diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions
index 16bb281..202ffcc 100644
--- a/sysdeps/unix/sysv/linux/Versions
+++ b/sysdeps/unix/sysv/linux/Versions
@@ -169,5 +169,7 @@ libc {
   GLIBC_PRIVATE {
     # functions used in other libraries
     __syscall_rt_sigqueueinfo;
+    # functions used by nscd
+    __netlink_assert_response;
   }
 }
diff --git a/sysdeps/unix/sysv/linux/check_native.c b/sysdeps/unix/sysv/linux/check_native.c
index eaefca1..d04c8f2 100644
--- a/sysdeps/unix/sysv/linux/check_native.c
+++ b/sysdeps/unix/sysv/linux/check_native.c
@@ -35,6 +35,7 @@
 
 #include <not-cancel.h>
 
+#include "netlinkaccess.h"
 
 void
 __check_native (uint32_t a1_index, int *a1_native,
@@ -117,6 +118,7 @@ __check_native (uint32_t a1_index, int *a1_native,
 	};
 
       ssize_t read_len = TEMP_FAILURE_RETRY (__recvmsg (fd, &msg, 0));
+      __netlink_assert_response (fd, read_len);
       if (read_len < 0)
 	goto out_fail;
 
diff --git a/sysdeps/unix/sysv/linux/check_pf.c b/sysdeps/unix/sysv/linux/check_pf.c
index f072fb3..af4fdf8 100644
--- a/sysdeps/unix/sysv/linux/check_pf.c
+++ b/sysdeps/unix/sysv/linux/check_pf.c
@@ -36,6 +36,7 @@
 #include <atomic.h>
 #include <nscd/nscd-client.h>
 
+#include "netlinkaccess.h"
 
 #ifndef IFA_F_HOMEADDRESS
 # define IFA_F_HOMEADDRESS 0
@@ -164,7 +165,8 @@ make_request (int fd, pid_t pid)
 	};
 
       ssize_t read_len = TEMP_FAILURE_RETRY (__recvmsg (fd, &msg, 0));
-      if (read_len <= 0)
+      __netlink_assert_response (fd, read_len);
+      if (read_len < 0)
 	goto out_fail;
 
       if (msg.msg_flags & MSG_TRUNC)
diff --git a/sysdeps/unix/sysv/linux/ifaddrs.c b/sysdeps/unix/sysv/linux/ifaddrs.c
index 64b4a1c..768a7ed 100644
--- a/sysdeps/unix/sysv/linux/ifaddrs.c
+++ b/sysdeps/unix/sysv/linux/ifaddrs.c
@@ -168,6 +168,7 @@ __netlink_request (struct netlink_handle *h, int type)
 	};
 
       read_len = TEMP_FAILURE_RETRY (__recvmsg (h->fd, &msg, 0));
+      __netlink_assert_response (h->fd, read_len);
       if (read_len < 0)
 	goto out_fail;
 
diff --git a/sysdeps/unix/sysv/linux/netlink_assert_response.c b/sysdeps/unix/sysv/linux/netlink_assert_response.c
new file mode 100644
index 0000000..41ed86e
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/netlink_assert_response.c
@@ -0,0 +1,100 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/socket.h>
+
+#include "netlinkaccess.h"
+
+static int
+get_address_family (int fd)
+{
+  struct sockaddr_storage sa;
+  socklen_t sa_len = sizeof (sa);
+  if (__getsockname (fd, (struct sockaddr *) &sa, &sa_len) < 0)
+    return -1;
+  return sa.ss_family;
+}
+
+void
+internal_function
+__netlink_assert_response (int fd, ssize_t result)
+{
+  if (result < 0)
+    {
+      /* Check if the error is unexpected.  */
+      bool terminate = false;
+      int error_code = errno;
+      int family = get_address_family (fd);
+      if (family != AF_NETLINK)
+        /* If the address family does not match (or getsockname
+           failed), report the original error.  */
+        terminate = true;
+      else if (error_code == EBADF
+          || error_code == ENOTCONN
+          || error_code == ENOTSOCK
+          || error_code == ECONNREFUSED)
+        /* These errors indicate that the descriptor is not a
+           connected socket.  */
+        terminate = true;
+      else if (error_code == EAGAIN || error_code == EWOULDBLOCK)
+        {
+          /* The kernel might return EAGAIN for other reasons than a
+             non-blocking socket.  But if the socket is not blocking,
+             it is not ours, so report the error.  */
+          int mode = __fcntl (fd, F_GETFL, 0);
+          if (mode < 0 || (mode & O_NONBLOCK) != 0)
+            terminate = true;
+        }
+      if (terminate)
+        {
+          char message[200];
+          if (family < 0)
+            __snprintf (message, sizeof (message),
+                        "Unexpected error %d on netlink descriptor %d",
+                        error_code, fd);
+          else
+            __snprintf (message, sizeof (message),
+                        "Unexpected error %d on netlink descriptor %d"
+                        " (address family %d)",
+                        error_code, fd, family);
+          __libc_fatal (message);
+        }
+      else
+        /* Restore orignal errno value.  */
+        __set_errno (error_code);
+    }
+  else if (result < sizeof (struct nlmsghdr))
+    {
+      char message[200];
+      int family = get_address_family (fd);
+      if (family < 0)
+          __snprintf (message, sizeof (message),
+                      "Unexpected netlink response of size %zd"
+                      " on descriptor %d",
+                      result, fd);
+      else
+          __snprintf (message, sizeof (message),
+                      "Unexpected Netlink response of size %zd"
+                      " on descriptor %d (address family %d)",
+                      result, fd, family);
+      __libc_fatal (message);
+    }
+}
+libc_hidden_def (__netlink_assert_response)
diff --git a/sysdeps/unix/sysv/linux/netlinkaccess.h b/sysdeps/unix/sysv/linux/netlinkaccess.h
index c204b67..01ac35c 100644
--- a/sysdeps/unix/sysv/linux/netlinkaccess.h
+++ b/sysdeps/unix/sysv/linux/netlinkaccess.h
@@ -19,6 +19,7 @@
 #define _NETLINKACCESS_H 1
 
 #include <stdint.h>
+#include <sys/types.h>
 #include <asm/types.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
@@ -48,5 +49,10 @@ extern void __netlink_close (struct netlink_handle *h);
 extern void __netlink_free_handle (struct netlink_handle *h);
 extern int __netlink_request (struct netlink_handle *h, int type);
 
+/* Terminate the process if RESULT is an invalid recvmsg result for
+   the netlink socket FD.  */
+void __netlink_assert_response (int fd, ssize_t result)
+  internal_function;
+libc_hidden_proto (__netlink_assert_response)
 
 #endif /* netlinkaccess.h */

Reply via email to