[osv-dev] [PATCH 10/10] netlink: enable it and add unit test
This changes bsd/net.cc to enables netlink by registering netlink domain and calling netlink_init(). It also adds a unit test to verify the netlink implementation. Signed-off-by: Waldemar Kozaczuk --- bsd/net.cc | 5 + modules/tests/Makefile | 3 +- tests/tst-netlink.c| 441 + 3 files changed, 448 insertions(+), 1 deletion(-) create mode 100644 tests/tst-netlink.c diff --git a/bsd/net.cc b/bsd/net.cc index 3e427575..f548e091 100644 --- a/bsd/net.cc +++ b/bsd/net.cc @@ -23,6 +23,7 @@ #include #include #include +#include /* Generation of ip ids */ void ip_initid(void); @@ -32,6 +33,8 @@ extern "C" { extern struct domain inetdomain; /* AF_ROUTE */ extern struct domain routedomain; +/* AF_NETLINK */ +extern struct domain netlinkdomain; } void net_init(void) @@ -53,9 +56,11 @@ void net_init(void) domaininit(NULL); OSV_DOMAIN_SET(inet); OSV_DOMAIN_SET(route); +OSV_DOMAIN_SET(netlink); rts_init(); route_init(); vnet_route_init(); +netlink_init(); ipport_tick_init(NULL); arp_init(); domainfinalize(NULL); diff --git a/modules/tests/Makefile b/modules/tests/Makefile index e462ebc8..f79da870 100644 --- a/modules/tests/Makefile +++ b/modules/tests/Makefile @@ -133,7 +133,8 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so tst-bsd-evh.so \ tst-getopt.so tst-getopt-pie.so tst-non-pie.so tst-semaphore.so \ tst-elf-init.so tst-realloc.so tst-setjmp.so \ libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \ - tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so + tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so \ + tst-netlink.so # libstatic-thread-variable.so tst-static-thread-variable.so \ #TODO For now let us disable these tests for aarch64 until diff --git a/tests/tst-netlink.c b/tests/tst-netlink.c new file mode 100644 index ..aebc9dd5 --- /dev/null +++ b/tests/tst-netlink.c @@ -0,0 +1,441 @@ +/* Unit test that verifies limited netlink support in OSv + * + * Copyright (C) 2022 Waldemar Kozaczuk + * + * This work is open source software, licensed under the terms of the + * BSD license as described in the LICENSE file in the top-level directory. + */ + +// This test should run on Linux: +// gcc tests/tst-netlink.c -o tst-netlink +// ./tst-netlink + +#include //printf, perror +#include//memset, strlen +#include//exit +#include//close +#include//msghdr +#include //inet_ntop +#include //sockaddr_nl +#include //rtgenmsg,ifinfomsg +#include +#include +#include + +#define BUFSIZE 8192 + +void die(const char *s) +{ +perror(s); +exit(1); +} + +int called_response_handler = 0; + +int test_netlink(struct nlmsghdr* req, pid_t pid, void (*handle_response)(struct nlmsghdr *)) +{ +struct sockaddr_nl src_addr, dst_addr, src_addr2; +int s, len, end = 0; +struct msghdr msg; +struct iovec iov[1]; +char buf[BUFSIZE]; + +//create a netlink socket +if ((s=socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0) +{ +die("socket FAILED"); +} + +//bind socket +memset(&src_addr, 0, sizeof(src_addr)); +src_addr.nl_family = AF_NETLINK; +src_addr.nl_pid = pid; // if 0 kernel will assign unique id +src_addr.nl_groups = 0; /* not in mcast groups */ +if (bind(s, (struct sockaddr*) &src_addr, sizeof(src_addr))) +{ +die("bind FAILED"); +} + +//get sock name to check pid +memset(&src_addr2, 0, sizeof(src_addr2)); +socklen_t addr_len = sizeof(src_addr2); +if (getsockname(s, (struct sockaddr*)&src_addr2, &addr_len)) { +die("getsockname FAILED"); +} +if (src_addr.nl_pid != 0) { +assert(src_addr.nl_pid == src_addr2.nl_pid); +} + +//build destination - kernel netlink address +memset(&dst_addr, 0, sizeof(dst_addr)); +dst_addr.nl_family = AF_NETLINK; +dst_addr.nl_pid = 0; // should be 0 if destination is kernel +//dst_addr.nl_pid = 1; //TODO: check that non-0 errors with "sendmsg: Operation not permitted" +dst_addr.nl_groups = 0; + +//build netlink message +iov[0].iov_base = req; +iov[0].iov_len = req->nlmsg_len; + +memset(&msg, 0, sizeof(msg)); +msg.msg_iov = iov; +msg.msg_iovlen = 1; +msg.msg_name = &dst_addr; +msg.msg_namelen = sizeof(dst_addr); + +//send the message +if (sendmsg(s, &msg, 0) < 0) +{ +die("sendmsg FAILED"); +} + +called_response_handler = 0; +//parse reply +while (!end) +{ +memset(&msg, 0, sizeof(msg)); //These and 2 lines below are needed to reset msg - otherwise weird page faults happen +msg.msg_iov = iov;//Check if we can improve things downstream with some asserts or even error handling +msg.msg_iovlen = 1; + +memset(buf, 0, BUFSIZE);
[osv-dev] [PATCH 09/10] netlink: set negative errno in error responses
The netlink specfication requires that error field contains a negative value of errno. Signed-off-by: Waldemar Kozaczuk --- bsd/sys/compat/linux/linux_netlink.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc index be9ea1b8..ec7e9341 100644 --- a/bsd/sys/compat/linux/linux_netlink.cc +++ b/bsd/sys/compat/linux/linux_netlink.cc @@ -506,7 +506,7 @@ netlink_senderr(struct socket *so, struct nlmsghdr *nlm, int error) return ENOBUFS; } err = (struct nlmsgerr *) nlmsg_data(hdr); - err->error = error; + err->error = -error; //Per netlink spec - "Negative errno or 0 for acknowledgements" if (nlm) { err->msg = *nlm; } else { -- 2.35.1 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-9-jwkozaczuk%40gmail.com.
[osv-dev] [PATCH 08/10] netlink: made some functions static
Signed-off-by: Waldemar Kozaczuk --- bsd/sys/compat/linux/linux_netlink.cc | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc index 180d81b5..be9ea1b8 100644 --- a/bsd/sys/compat/linux/linux_netlink.cc +++ b/bsd/sys/compat/linux/linux_netlink.cc @@ -118,6 +118,7 @@ static int get_sockaddr_mask_prefix_len(struct bsd_sockaddr *sa) } +static void *nl_m_put(struct mbuf *m0, int len) { struct mbuf *m, *n; @@ -151,6 +152,7 @@ void *nl_m_put(struct mbuf *m0, int len) return data; } +static struct nlmsghdr * nlmsg_put(struct mbuf *m, uint32_t pid, uint32_t seq, int type, int len, int flags) { struct nlmsghdr *nlh; @@ -170,16 +172,19 @@ struct nlmsghdr * nlmsg_put(struct mbuf *m, uint32_t pid, uint32_t seq, int type return nlh; } +static struct nlmsghdr * nlmsg_begin(struct mbuf *m, uint32_t pid, uint32_t seq, int type, int len, int flags) { return nlmsg_put(m, pid, seq, type, len, flags); } +static void nlmsg_end(struct mbuf *m, struct nlmsghdr *nlh) { nlh->nlmsg_len = m->M_dat.MH.MH_pkthdr.len - ((uintptr_t)nlh - (uintptr_t)m->m_hdr.mh_data); } +static int nla_put(struct mbuf *m, int attrtype, int len, const void *src) { struct nlattr *nla; @@ -198,16 +203,18 @@ int nla_put(struct mbuf *m, int attrtype, int len, const void *src) } template -int nla_put_type(struct mbuf *m, int attrtype, T val) +static int nla_put_type(struct mbuf *m, int attrtype, T val) { return nla_put(m, attrtype, sizeof(val), &val); } +static int nla_put_string(struct mbuf *m, int attrtype, const char *str) { return nla_put(m, attrtype, strlen(str) + 1, str); } +static int nla_put_sockaddr(struct mbuf *m, int attrtype, struct bsd_sockaddr *sa) { void *data; -- 2.35.1 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-8-jwkozaczuk%40gmail.com.
[osv-dev] [PATCH 06/10] netlink: return stashed pid
There are three types of pid used in netlink interface: - the nl_pid on the source (app) side (part of sockaddr_nl) set before bind(); could be 0 to request kernel generating new one - the nl_pid on the destination (kernel) size set into dst_addr that always needs to be 0 if we communicate with kernel - the nlmsg_pid (sender port ID) that is part of the netlink message header sent to and received from kernel Some relevant information from Linux docs: " nlmsg_seq and nlmsg_pid are used to track messages. nlmsg_pid shows the origin of the message. Note that there isn't a 1:1 relationship between nlmsg_pid and the PID of the process if the message originated from a netlink socket. See the ADDRESS FORMATS section for further information. Both nlmsg_seq and nlmsg_pid are opaque to netlink core." and: " nl_pid is the unicast address of netlink socket. It's always 0 if the destination is in the kernel. For a user-space process, nl_pid is usually the PID of the process owning the destination socket. However, nl_pid identifies a netlink socket, not a process. If a process owns several netlink sockets, then nl_pid can be equal to the process ID only for at most one socket. There are two ways to assign nl_pid to a netlink socket. If the application sets nl_pid before calling bind(2), then it is up to the application to make sure that nl_pid is unique. If the application sets it to 0, the kernel takes care of assigning it. The kernel assigns the process ID to the first netlink socket the process opens and assigns a unique nl_pid to every netlink socket that the process subsequently creates." The 1st one needs to be stashed or generated (if 0) and then set on nlmsg_pid for each response so that the application receving it can distinguish it if necessary. Golang runtime actually calls sockname() and verifies that the nlmsg_pid in the replies matches the nl_pid on the source socket. The patch modifies relevant code that builds netlink responses to put the nl_pid stashed during socket attach process to set it as value of inlmsg_pid. It also re-implements the netlink_sockaddr() to make it return information including the source PID. Signed-off-by: Waldemar Kozaczuk --- bsd/sys/compat/linux/linux_netlink.cc | 41 +++ bsd/sys/net/if_llatbl.cc | 8 +++--- bsd/sys/net/if_llatbl.h | 4 +-- 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc index fcdab06b..82205d2b 100644 --- a/bsd/sys/compat/linux/linux_netlink.cc +++ b/bsd/sys/compat/linux/linux_netlink.cc @@ -436,10 +436,27 @@ netlink_shutdown(struct socket *so) return (raw_usrreqs.pru_shutdown(so)); } +static pid_t +get_socket_pid(struct socket *so) +{ + struct rawcb *rp = sotorawcb(so); + struct netlinkcb *ncb = (netlinkcb *)rp; + return ncb->nl_pid; +} + static int netlink_sockaddr(struct socket *so, struct bsd_sockaddr **nam) { - return (raw_usrreqs.pru_sockaddr(so, nam)); + struct bsd_sockaddr_nl *sin; + + sin = (bsd_sockaddr_nl*)malloc(sizeof *sin); + bzero(sin, sizeof *sin); + sin->nl_family = AF_NETLINK; + sin->nl_len = sizeof(*sin); + sin->nl_pid = get_socket_pid(so); + + *nam = (bsd_sockaddr*)sin; + return 0; } static struct pr_usrreqs netlink_usrreqs = initialize_with([] (pr_usrreqs& x) { @@ -474,7 +491,7 @@ netlink_senderr(struct socket *so, struct nlmsghdr *nlm, int error) } if ((hdr = (struct nlmsghdr *)nlmsg_put(m, - nlm ? nlm->nlmsg_pid : 0, + get_socket_pid(so), nlm ? nlm->nlmsg_seq : 0, NLMSG_ERROR, sizeof(*err), nlm ? nlm->nlmsg_flags : 0)) == NULL) { @@ -513,7 +530,7 @@ netlink_process_getlink_msg(struct socket *so, struct nlmsghdr *nlm) TAILQ_FOREACH(ifp, &V_ifnet, if_link) { IF_ADDR_RLOCK(ifp); - nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags); + nlh = nlmsg_begin(m, get_socket_pid(so), nlm->nlmsg_seq, LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags); if (!nlh) { error = ENOBUFS; goto done; @@ -547,7 +564,7 @@ netlink_process_getlink_msg(struct socket *so, struct nlmsghdr *nlm) IF_ADDR_RUNLOCK(ifp); nlmsg_end(m, nlh); } - nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags); + nlh = nlmsg_put(m, get_socket_pid(so), nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flag
[osv-dev] [PATCH 07/10] netlink: fix error handling
Fix netlink_process_msg() to propagate potential error from netlink_senderr(). Normally netlink_senderr() should return 0 indicating that the error response was built successfully. This patch tweaks the logic to make sure the error response in such case is sent back as a NLMSG_ERROR reply accordingly instead of making sendmsg() return error. Signed-off-by: Waldemar Kozaczuk --- bsd/sys/compat/linux/linux_netlink.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc index 82205d2b..180d81b5 100644 --- a/bsd/sys/compat/linux/linux_netlink.cc +++ b/bsd/sys/compat/linux/linux_netlink.cc @@ -830,7 +830,7 @@ netlink_process_getneigh_msg(struct socket *so, struct nlmsghdr *nlm) struct netlink_getneigh_lle_cbdata cbdata; int error; - if (nlm->nlmsg_len < sizeof (struct ndmsg)) { + if (nlm->nlmsg_len < NLMSG_LENGTH(sizeof (struct ndmsg))) { return EINVAL; } @@ -892,7 +892,7 @@ netlink_process_msg(struct mbuf *m, struct socket *so) flush: if (error) { - netlink_senderr(so, nlm, error); + error = netlink_senderr(so, nlm, error); } if (m) { m_freem(m); -- 2.35.1 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-7-jwkozaczuk%40gmail.com.
[osv-dev] [PATCH 05/10] netlink: stash nl_pid into netlinkcb
This enhances the netlink_attach() to capture or generate the source nl_pid (if 0) and save it in the control back that could be fetched later when necessary. This will be useful in the next patch. Signed-off-by: Waldemar Kozaczuk --- bsd/sys/compat/linux/linux_netlink.cc | 25 + 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc index 7e743db8..fcdab06b 100644 --- a/bsd/sys/compat/linux/linux_netlink.cc +++ b/bsd/sys/compat/linux/linux_netlink.cc @@ -61,6 +61,14 @@ struct bsd_sockaddr_nl { uint32_tnl_groups;/* Multicast groups mask */ }; +struct netlinkcb { + struct rawcbraw; + pid_t nl_pid; +}; + +std::atomic _nl_next_gen_pid(2); + + MALLOC_DEFINE(M_NETLINK, "netlink", "netlink socket"); static struct bsd_sockaddr netlink_src = { 2, PF_NETLINK, }; @@ -311,16 +319,18 @@ netlink_close(struct socket *so) static int netlink_attach(struct socket *so, int proto, struct thread *td) { + struct netlinkcb *ncb; struct rawcb *rp; int s, error; KASSERT(so->so_pcb == NULL, ("netlink_attach: so_pcb != NULL")); /* XXX */ - rp = (rawcb *)malloc(sizeof *rp); - if (rp == NULL) + ncb = (netlinkcb *)malloc(sizeof *ncb); + if (ncb == NULL) return ENOBUFS; - bzero(rp, sizeof *rp); + bzero(ncb, sizeof *ncb); + rp = &ncb->raw; /* * The splnet() is necessary to block protocols from sending @@ -362,7 +372,14 @@ netlink_bind(struct socket *so, struct bsd_sockaddr *nam, struct thread *td) __FILE__, __LINE__, __FUNCTION__, nam->sa_len, sizeof(struct bsd_sockaddr_nl)); return EINVAL; } - // TODO: stash the nl_pid somewhere + auto *ncb = reinterpret_cast(rp); + bsd_sockaddr_nl *nl_sock_addr = (bsd_sockaddr_nl*)nam; + if (nl_sock_addr->nl_pid == 0) { // kernel needs to assign pid + auto assigned_pid = _nl_next_gen_pid.fetch_add(1, std::memory_order_relaxed); + ncb->nl_pid = assigned_pid; + } else { + ncb->nl_pid = nl_sock_addr->nl_pid; + } return 0; } return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */ -- 2.35.1 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-5-jwkozaczuk%40gmail.com.
[osv-dev] [PATCH 04/10] netlink: do not put IFA_BROADCAST for loopback address
This is a minor adjustment to make OSv implementation match what Linux does - skip IFA_BROADCAST attributes for loopback address in NEWADDR response. Signed-off-by: Waldemar Kozaczuk --- bsd/sys/compat/linux/linux_netlink.cc | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc index 4208ce7f..7e743db8 100644 --- a/bsd/sys/compat/linux/linux_netlink.cc +++ b/bsd/sys/compat/linux/linux_netlink.cc @@ -616,8 +616,11 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm) in6_clearscope(&broadaddr.sin6_addr); p_broadaddr = (struct bsd_sockaddr *)&broadaddr; } - if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr) || - nla_put_sockaddr(m, IFA_BROADCAST, p_broadaddr)){ + if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr)){ + error = ENOBUFS; + goto done; + } + if (!(ifm->ifa_flags & IFF_LOOPBACK) && nla_put_sockaddr(m, IFA_BROADCAST, p_broadaddr)){ error = ENOBUFS; goto done; } @@ -625,8 +628,11 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm) else #endif { - if (nla_put_sockaddr(m, IFA_ADDRESS, ifa->ifa_addr) || - nla_put_sockaddr(m, IFA_BROADCAST, ifa->ifa_broadaddr)){ + if (nla_put_sockaddr(m, IFA_ADDRESS, ifa->ifa_addr)){ + error = ENOBUFS; + goto done; + } + if (!(ifm->ifa_flags & IFF_LOOPBACK) && nla_put_sockaddr(m, IFA_BROADCAST, ifa->ifa_broadaddr)){ error = ENOBUFS; goto done; } -- 2.35.1 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-4-jwkozaczuk%40gmail.com.
[osv-dev] [PATCH 03/10] netlink: IFA_ADDRESS needs to go first
Golang uses the netlink interface RTM_GETADDR to query the network interfaces and IPs. It assumes that the 1st attribute in the RTM_NEWADDR response is IFA_ADDRESS. This patch changes the order in which RTM_NEWADDR attributes are sent to make sure the IFA_ADDRESS goes first and IFA_LABEL last. This does not seem to be documented anywhere but Linux sends RTM_NEWADDR responses with the IFA_ADDRESS attribute first so we follow suit. Signed-off-by: Waldemar Kozaczuk --- bsd/sys/compat/linux/linux_netlink.cc | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc index ea0cf609..4208ce7f 100644 --- a/bsd/sys/compat/linux/linux_netlink.cc +++ b/bsd/sys/compat/linux/linux_netlink.cc @@ -599,10 +599,6 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm) ifm->ifa_prefixlen = get_sockaddr_mask_prefix_len(ifa->ifa_netmask); ifm->ifa_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifa_scope = 0; // FIXME: - if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) { - error = ENOBUFS; - goto done; - } #ifdef INET6 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6){ // FreeBSD embeds the IPv6 scope ID in the IPv6 address @@ -635,6 +631,10 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm) goto done; } } + if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) { + error = ENOBUFS; + goto done; + } nlmsg_end(m, nlh); } -- 2.35.1 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-3-jwkozaczuk%40gmail.com.
[osv-dev] [PATCH 02/10] netlink: set LINUX_RTM_NEWADDR and LINUX_RTM_NEWNEIGH on responses
This patch fixes a minor bug in handling RTM_GETADDR and RTM_GETNEIGH requests. It tweaks the relevant code to set the RTM_NEWADDR and RTM_NEWNEIGH type for the responses respectively. This is important as for example Golang runtime tests the nlmsg_type of the netlink response and breaks if it is wrong. Signed-off-by: Waldemar Kozaczuk --- bsd/sys/compat/linux/linux_netlink.cc | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc index bc02bb7f..ea0cf609 100644 --- a/bsd/sys/compat/linux/linux_netlink.cc +++ b/bsd/sys/compat/linux/linux_netlink.cc @@ -588,7 +588,7 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm) if (!ifa->ifa_addr) continue; - nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_GETADDR, sizeof(*ifm), nlm->nlmsg_flags); + nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWADDR, sizeof(*ifm), nlm->nlmsg_flags); if (!nlh) { error = ENOBUFS; goto done; @@ -720,7 +720,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data) struct nlmsghdr *nlm = cbdata->nlm; struct mbuf *m = cbdata->m; struct ndmsg *ndm; - struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_GETNEIGH, sizeof(*ndm), nlm->nlmsg_flags); + struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWNEIGH, sizeof(*ndm), nlm->nlmsg_flags); if (!nlh) { return ENOBUFS; @@ -753,7 +753,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data) } } #endif - + if (nla_put(m, NDA_LLADDR, 6, lle->ll_addr.mac16)) { return ENOBUFS; } @@ -875,29 +875,29 @@ extern struct domain netlinkdomain; /* or at least forward */ static struct protosw netlinksw[] = { initialize_with([] (protosw& x) { - x.pr_type = SOCK_RAW; + x.pr_type = SOCK_RAW; x.pr_domain = &netlinkdomain; x.pr_flags =PR_ATOMIC|PR_ADDR; x.pr_output = netlink_output; x.pr_ctlinput = raw_ctlinput; - x.pr_init = raw_init; + x.pr_init = raw_init; x.pr_usrreqs = &netlink_usrreqs; }), initialize_with([] (protosw& x) { - x.pr_type = SOCK_DGRAM; + x.pr_type = SOCK_DGRAM; x.pr_domain = &netlinkdomain; x.pr_flags =PR_ATOMIC|PR_ADDR; x.pr_output = netlink_output; x.pr_ctlinput = raw_ctlinput; - x.pr_init = raw_init; + x.pr_init = raw_init; x.pr_usrreqs = &netlink_usrreqs; }), }; struct domain netlinkdomain = initialize_with([] (domain& x) { - x.dom_family = PF_NETLINK; - x.dom_name ="netlink"; - x.dom_protosw = netlinksw; + x.dom_family = PF_NETLINK; + x.dom_name ="netlink"; + x.dom_protosw = netlinksw; x.dom_protoswNPROTOSW = &netlinksw[sizeof(netlinksw)/sizeof(netlinksw[0])]; }); -- 2.35.1 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-2-jwkozaczuk%40gmail.com.
[osv-dev] [PATCH 01/10] netlink: minimal Linux rtnetlink support
This 1st of the 10 patches brings support of the minimal subset of the rtnetlink (Linux routing socket) interface as described here - https://man7.org/linux/man-pages/man7/rtnetlink.7.html. The rtnetlink is actually a subset of even richer netlink interface described here - https://man7.org/linux/man-pages/man7/netlink.7.html. In other words, rtnetlink covers a NETLINK_ROUTE family of the broader netlink interface. We need rtnetlink in order to support the implemetation of if_nameindex() and getifaddrs() in modern musl 1.1.24. In addition Golang uses the netlink interface to discover the interfaces and IP address as well. Please note this is an original copy of the Charles Myers' two commits: f1cd48e0f192564d64e7b1e1caccc8df05e7cd5d except of the modifications to bsd/net.cc that are part of the last commit and subset of the 64a0c1affe9921e6a5a5b164edf1a544a7297393 that adds lltable_foreach() and lltable_foreach_lle(). The next 8 much smaller patches fix various small bugs and enhance slightly this implementation. The last one enables the netlink support and adds a unit test. The netlink interface is pretty rich and not very precisely documented. I have actually used a unit test to discover in more details how the netlink responses should look like. In general, the application would use standard socket API to open a socket with the domain and protocol equal to AF_NETLINK and NETLINK_ROUTE respectively and typically use SOCK_RAW as type. Then it would optionally bind the socket and build a request sent using standard sendmsg(). Finally it would receive all replies from kernel using recvmsg(). To illustrate, the incomplete code might look like this: //step 1 int s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); //step 2 src_addr.nl_family = AF_NETLINK; src_addr.nl_pid = pid; // if 0 kernel will assign unique id bind(s, (struct sockaddr*) &src_addr, sizeof(src_addr)) // step 3 dst_addr.nl_family = AF_NETLINK; dst_addr.nl_pid = 0; // should be 0 if destination is kernel iov[0].iov_base = req; iov[0].iov_len = req->nlmsg_len; snd_msg.msg_iov = iov; snd_msg.msg_iovlen = 1; snd_msg.msg_name = &dst_addr; snd_msg.msg_namelen = sizeof(dst_addr); sendmsg(s, &snd_msg, 0) //step 4 rcv_msg.msg_iov[0].iov_base = buf; rcv_msg.msg_iov[0].iov_len = BUFSIZE; recvmsg(s, &rcv_msg, 0) //process replies received in buf This patch implements support of only 3 rtnetlink types of requests: - RTM_GETLINK - RTM_GETADDR - RTM_GETNEIGH The bulk of the implementation is in the linux_netlink.cc and mostly centers around following functions: - netlink_attach() - netlink_bind() - netlink_output() - netlink_process_msg() - netlink_process_getlink_msg() - netlink_process_getaddr_msg() - netlink_process_getneigh_msg() Most other pru_* functions delegate to raw_usrreqs as is. Authored-by: Charles Myers Signed-off-by: Waldemar Kozaczuk --- Makefile | 1 + bsd/sys/compat/linux/linux_netlink.cc | 904 ++ bsd/sys/compat/linux/linux_netlink.h | 175 + bsd/sys/compat/linux/linux_socket.cc | 5 + bsd/sys/compat/linux/linux_socket.h | 1 + bsd/sys/net/if_llatbl.cc | 46 +- bsd/sys/net/if_llatbl.h | 13 + bsd/sys/net/netisr.h | 1 + 8 files changed, 1143 insertions(+), 3 deletions(-) create mode 100644 bsd/sys/compat/linux/linux_netlink.cc create mode 100644 bsd/sys/compat/linux/linux_netlink.h diff --git a/Makefile b/Makefile index 19a4571b..2d1ba6a8 100644 --- a/Makefile +++ b/Makefile @@ -593,6 +593,7 @@ bsd += bsd/porting/bus_dma.o bsd += bsd/sys/netinet/if_ether.o bsd += bsd/sys/compat/linux/linux_socket.o bsd += bsd/sys/compat/linux/linux_ioctl.o +bsd += bsd/sys/compat/linux/linux_netlink.o bsd += bsd/sys/net/if_ethersubr.o bsd += bsd/sys/net/if_llatbl.o bsd += bsd/sys/net/radix.o diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc new file mode 100644 index ..bc02bb7f --- /dev/null +++ b/bsd/sys/compat/linux/linux_netlink.cc @@ -0,0 +1,904 @@ +/* + * Linux NETLINK socket implementation. + * + * NETLINK is used to support IPv4/IPv6 LIBC getifaddrs(), if_nameindex(). + * + * Warning: Tx/Rx messages are compatible with Linux not FreeBSD. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef INET6 +#include +#include +#include +#include +#include +#endif + +#include +#include +#include + +#if !defined(offsetof) +#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) +#endif + +mutex netlink_mtx; + +#define NETLINK_LOCK() mutex_lock(&netlink_mtx) +#define NETLINK_UNLOCK() mutex_unlock(&netlink_mtx) +#define NETLINK_LOCK_ASSERT() assert