[osv-dev] [PATCH 10/10] netlink: enable it and add unit test

2022-06-03 Thread Waldemar Kozaczuk
This changes bsd/net.cc to enables netlink by registering netlink
domain and calling netlink_init().

It also adds a unit test to verify the netlink implementation.

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/net.cc |   5 +
 modules/tests/Makefile |   3 +-
 tests/tst-netlink.c| 441 +
 3 files changed, 448 insertions(+), 1 deletion(-)
 create mode 100644 tests/tst-netlink.c

diff --git a/bsd/net.cc b/bsd/net.cc
index 3e427575..f548e091 100644
--- a/bsd/net.cc
+++ b/bsd/net.cc
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Generation of ip ids */
 void ip_initid(void);
@@ -32,6 +33,8 @@ extern "C" {
 extern  struct domain inetdomain;
 /* AF_ROUTE */
 extern  struct domain routedomain;
+/* AF_NETLINK */
+extern  struct domain netlinkdomain;
 }
 
 void net_init(void)
@@ -53,9 +56,11 @@ void net_init(void)
 domaininit(NULL);
 OSV_DOMAIN_SET(inet);
 OSV_DOMAIN_SET(route);
+OSV_DOMAIN_SET(netlink);
 rts_init();
 route_init();
 vnet_route_init();
+netlink_init();
 ipport_tick_init(NULL);
 arp_init();
 domainfinalize(NULL);
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
index e462ebc8..f79da870 100644
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -133,7 +133,8 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
tst-getopt.so tst-getopt-pie.so tst-non-pie.so tst-semaphore.so \
tst-elf-init.so tst-realloc.so tst-setjmp.so \
libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \
-   tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so
+   tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so \
+   tst-netlink.so
 #  libstatic-thread-variable.so tst-static-thread-variable.so \
 
 #TODO For now let us disable these tests for aarch64 until
diff --git a/tests/tst-netlink.c b/tests/tst-netlink.c
new file mode 100644
index ..aebc9dd5
--- /dev/null
+++ b/tests/tst-netlink.c
@@ -0,0 +1,441 @@
+/* Unit test that verifies limited netlink support in OSv
+ *
+ * Copyright (C) 2022 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+// This test should run on Linux:
+//   gcc tests/tst-netlink.c -o tst-netlink
+//   ./tst-netlink
+
+#include //printf, perror
+#include//memset, strlen
+#include//exit
+#include//close
+#include//msghdr
+#include //inet_ntop
+#include //sockaddr_nl
+#include   //rtgenmsg,ifinfomsg
+#include 
+#include 
+#include 
+
+#define BUFSIZE 8192
+
+void die(const char *s)
+{
+perror(s);
+exit(1);
+}
+
+int called_response_handler = 0;
+
+int test_netlink(struct nlmsghdr* req, pid_t pid, void 
(*handle_response)(struct nlmsghdr *))
+{
+struct sockaddr_nl src_addr, dst_addr, src_addr2;
+int s, len, end = 0;
+struct msghdr msg;
+struct iovec iov[1];
+char buf[BUFSIZE];
+
+//create a netlink socket
+if ((s=socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0)
+{
+die("socket FAILED");
+}
+
+//bind socket
+memset(&src_addr, 0, sizeof(src_addr));
+src_addr.nl_family = AF_NETLINK;
+src_addr.nl_pid = pid; // if 0 kernel will assign unique id
+src_addr.nl_groups = 0;  /* not in mcast groups */
+if (bind(s, (struct sockaddr*) &src_addr, sizeof(src_addr)))
+{
+die("bind FAILED");
+}
+
+//get sock name to check pid
+memset(&src_addr2, 0, sizeof(src_addr2));
+socklen_t addr_len = sizeof(src_addr2);
+if (getsockname(s, (struct sockaddr*)&src_addr2, &addr_len)) {
+die("getsockname FAILED");
+}
+if (src_addr.nl_pid != 0) {
+assert(src_addr.nl_pid == src_addr2.nl_pid);
+}
+
+//build destination - kernel netlink address
+memset(&dst_addr, 0, sizeof(dst_addr));
+dst_addr.nl_family = AF_NETLINK;
+dst_addr.nl_pid = 0; // should be 0 if destination is kernel
+//dst_addr.nl_pid = 1; //TODO: check that non-0 errors with "sendmsg: 
Operation not permitted"
+dst_addr.nl_groups = 0;
+
+//build netlink message
+iov[0].iov_base = req;
+iov[0].iov_len = req->nlmsg_len;
+
+memset(&msg, 0, sizeof(msg));
+msg.msg_iov = iov;
+msg.msg_iovlen = 1;
+msg.msg_name = &dst_addr;
+msg.msg_namelen = sizeof(dst_addr);
+
+//send the message
+if (sendmsg(s, &msg, 0) < 0)
+{
+die("sendmsg FAILED");
+}
+
+called_response_handler = 0;
+//parse reply
+while (!end)
+{
+memset(&msg, 0, sizeof(msg)); //These and 2 lines below are needed to 
reset msg - otherwise weird page faults happen
+msg.msg_iov = iov;//Check if we can improve things 
downstream with some asserts or even error handling
+msg.msg_iovlen = 1;
+
+memset(buf, 0, BUFSIZE);

[osv-dev] [PATCH 09/10] netlink: set negative errno in error responses

2022-06-03 Thread Waldemar Kozaczuk
The netlink specfication requires that error field contains a negative
value of errno.

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/sys/compat/linux/linux_netlink.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc 
b/bsd/sys/compat/linux/linux_netlink.cc
index be9ea1b8..ec7e9341 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -506,7 +506,7 @@ netlink_senderr(struct socket *so, struct nlmsghdr *nlm, 
int error)
return ENOBUFS;
}
err = (struct nlmsgerr *) nlmsg_data(hdr);
-   err->error = error;
+   err->error = -error; //Per netlink spec - "Negative errno or 0 for 
acknowledgements"
if (nlm) {
err->msg = *nlm;
} else {
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-9-jwkozaczuk%40gmail.com.


[osv-dev] [PATCH 08/10] netlink: made some functions static

2022-06-03 Thread Waldemar Kozaczuk
Signed-off-by: Waldemar Kozaczuk 
---
 bsd/sys/compat/linux/linux_netlink.cc | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc 
b/bsd/sys/compat/linux/linux_netlink.cc
index 180d81b5..be9ea1b8 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -118,6 +118,7 @@ static int get_sockaddr_mask_prefix_len(struct bsd_sockaddr 
*sa)
 }
 
 
+static
 void *nl_m_put(struct mbuf *m0, int len)
 {
struct mbuf *m, *n;
@@ -151,6 +152,7 @@ void *nl_m_put(struct mbuf *m0, int len)
return data;
 }
 
+static
 struct nlmsghdr * nlmsg_put(struct mbuf *m, uint32_t pid, uint32_t seq, int 
type, int len, int flags)
 {
struct nlmsghdr *nlh;
@@ -170,16 +172,19 @@ struct nlmsghdr * nlmsg_put(struct mbuf *m, uint32_t pid, 
uint32_t seq, int type
return nlh;
 }
 
+static
 struct nlmsghdr * nlmsg_begin(struct mbuf *m, uint32_t pid, uint32_t seq, int 
type, int len, int flags)
 {
return nlmsg_put(m, pid, seq, type, len, flags);
 }
 
+static
 void nlmsg_end(struct mbuf *m, struct nlmsghdr *nlh)
 {
nlh->nlmsg_len = m->M_dat.MH.MH_pkthdr.len - ((uintptr_t)nlh - 
(uintptr_t)m->m_hdr.mh_data);
 }
 
+static
 int nla_put(struct mbuf *m, int attrtype, int len, const void *src)
 {
struct nlattr *nla;
@@ -198,16 +203,18 @@ int nla_put(struct mbuf *m, int attrtype, int len, const 
void *src)
 }
 
 template
-int nla_put_type(struct mbuf *m, int attrtype, T val)
+static int nla_put_type(struct mbuf *m, int attrtype, T val)
 {
return nla_put(m, attrtype, sizeof(val), &val);
 }
 
+static
 int nla_put_string(struct mbuf *m, int attrtype, const char *str)
 {
return nla_put(m, attrtype, strlen(str) + 1, str);
 }
 
+static
 int nla_put_sockaddr(struct mbuf *m, int attrtype, struct bsd_sockaddr *sa)
 {
void *data;
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-8-jwkozaczuk%40gmail.com.


[osv-dev] [PATCH 06/10] netlink: return stashed pid

2022-06-03 Thread Waldemar Kozaczuk
There are three types of pid used in netlink interface:
- the nl_pid on the source (app) side (part of sockaddr_nl) set before
  bind(); could be 0 to request kernel generating new one
- the nl_pid on the destination (kernel) size set into dst_addr that
  always needs to be 0 if we communicate with kernel
- the nlmsg_pid (sender port ID) that is part of the netlink message
  header sent to and received from kernel

Some relevant information from Linux docs:

"  nlmsg_seq and nlmsg_pid are used to track messages.  nlmsg_pid
   shows the origin of the message.  Note that there isn't a 1:1
   relationship between nlmsg_pid and the PID of the process if the
   message originated from a netlink socket.  See the ADDRESS
   FORMATS section for further information.

   Both nlmsg_seq and nlmsg_pid are opaque to netlink core."

and:

"  nl_pid is the unicast address of netlink socket.  It's always 0
   if the destination is in the kernel.  For a user-space process,
   nl_pid is usually the PID of the process owning the destination
   socket.  However, nl_pid identifies a netlink socket, not a
   process.  If a process owns several netlink sockets, then nl_pid
   can be equal to the process ID only for at most one socket.
   There are two ways to assign nl_pid to a netlink socket.  If the
   application sets nl_pid before calling bind(2), then it is up to
   the application to make sure that nl_pid is unique.  If the
   application sets it to 0, the kernel takes care of assigning it.
   The kernel assigns the process ID to the first netlink socket the
   process opens and assigns a unique nl_pid to every netlink socket
   that the process subsequently creates."

The 1st one needs to be stashed or generated (if 0) and then set on nlmsg_pid
for each response so that the application receving it can distinguish it
if necessary. Golang runtime actually calls sockname() and verifies that
the nlmsg_pid in the replies matches the nl_pid on the source socket.

The patch modifies relevant code that builds netlink responses
to put the nl_pid stashed during socket attach process to set it as
value of inlmsg_pid. It also re-implements the netlink_sockaddr()
to make it return information including the source PID.

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/sys/compat/linux/linux_netlink.cc | 41 +++
 bsd/sys/net/if_llatbl.cc  |  8 +++---
 bsd/sys/net/if_llatbl.h   |  4 +--
 3 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc 
b/bsd/sys/compat/linux/linux_netlink.cc
index fcdab06b..82205d2b 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -436,10 +436,27 @@ netlink_shutdown(struct socket *so)
return (raw_usrreqs.pru_shutdown(so));
 }
 
+static pid_t
+get_socket_pid(struct socket *so)
+{
+   struct rawcb *rp = sotorawcb(so);
+   struct netlinkcb *ncb = (netlinkcb *)rp;
+   return ncb->nl_pid;
+}
+
 static int
 netlink_sockaddr(struct socket *so, struct bsd_sockaddr **nam)
 {
-   return (raw_usrreqs.pru_sockaddr(so, nam));
+   struct bsd_sockaddr_nl *sin;
+
+   sin = (bsd_sockaddr_nl*)malloc(sizeof *sin);
+   bzero(sin, sizeof *sin);
+   sin->nl_family = AF_NETLINK;
+   sin->nl_len = sizeof(*sin);
+   sin->nl_pid = get_socket_pid(so);
+
+   *nam = (bsd_sockaddr*)sin;
+   return 0;
 }
 
 static struct pr_usrreqs netlink_usrreqs = initialize_with([] (pr_usrreqs& x) {
@@ -474,7 +491,7 @@ netlink_senderr(struct socket *so, struct nlmsghdr *nlm, 
int error)
}
 
if ((hdr = (struct nlmsghdr *)nlmsg_put(m,
-   nlm ? nlm->nlmsg_pid : 0,
+   get_socket_pid(so),
nlm ? nlm->nlmsg_seq : 0,
NLMSG_ERROR, sizeof(*err),
nlm ? nlm->nlmsg_flags : 0)) == 
NULL) {
@@ -513,7 +530,7 @@ netlink_process_getlink_msg(struct socket *so, struct 
nlmsghdr *nlm)
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
IF_ADDR_RLOCK(ifp);
 
-   nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, 
LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags);
+   nlh = nlmsg_begin(m, get_socket_pid(so), nlm->nlmsg_seq, 
LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags);
if (!nlh) {
error = ENOBUFS;
goto done;
@@ -547,7 +564,7 @@ netlink_process_getlink_msg(struct socket *so, struct 
nlmsghdr *nlm)
IF_ADDR_RUNLOCK(ifp);
nlmsg_end(m, nlh);
}
-   nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, 
nlm->nlmsg_flags);
+   nlh = nlmsg_put(m, get_socket_pid(so), nlm->nlmsg_seq, NLMSG_DONE, 0, 
nlm->nlmsg_flag

[osv-dev] [PATCH 07/10] netlink: fix error handling

2022-06-03 Thread Waldemar Kozaczuk
Fix netlink_process_msg() to propagate potential error
from netlink_senderr(). Normally netlink_senderr() should return
0 indicating that the error response was built successfully.
This patch tweaks the logic to make sure the error response
in such case is sent back as a NLMSG_ERROR reply accordingly
instead of making sendmsg() return error.

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/sys/compat/linux/linux_netlink.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc 
b/bsd/sys/compat/linux/linux_netlink.cc
index 82205d2b..180d81b5 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -830,7 +830,7 @@ netlink_process_getneigh_msg(struct socket *so, struct 
nlmsghdr *nlm)
struct netlink_getneigh_lle_cbdata cbdata;
int error;
 
-   if (nlm->nlmsg_len < sizeof (struct ndmsg)) {
+   if (nlm->nlmsg_len < NLMSG_LENGTH(sizeof (struct ndmsg))) {
return EINVAL;
}
 
@@ -892,7 +892,7 @@ netlink_process_msg(struct mbuf *m, struct socket *so)
 
 flush:
if (error) {
-   netlink_senderr(so, nlm, error);
+   error = netlink_senderr(so, nlm, error);
}
if (m) {
m_freem(m);
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-7-jwkozaczuk%40gmail.com.


[osv-dev] [PATCH 05/10] netlink: stash nl_pid into netlinkcb

2022-06-03 Thread Waldemar Kozaczuk
This enhances the netlink_attach() to capture or generate the source
nl_pid (if 0) and save it in the control back that could be fetched later when
necessary. This will be useful in the next patch.

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/sys/compat/linux/linux_netlink.cc | 25 +
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc 
b/bsd/sys/compat/linux/linux_netlink.cc
index 7e743db8..fcdab06b 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -61,6 +61,14 @@ struct bsd_sockaddr_nl {
uint32_tnl_groups;/* Multicast groups mask */
 };
 
+struct netlinkcb {
+   struct rawcbraw;
+   pid_t   nl_pid;
+};
+
+std::atomic _nl_next_gen_pid(2);
+
+
 MALLOC_DEFINE(M_NETLINK, "netlink", "netlink socket");
 
 static struct  bsd_sockaddr netlink_src = { 2, PF_NETLINK, };
@@ -311,16 +319,18 @@ netlink_close(struct socket *so)
 static int
 netlink_attach(struct socket *so, int proto, struct thread *td)
 {
+   struct netlinkcb *ncb;
struct rawcb *rp;
int s, error;
 
KASSERT(so->so_pcb == NULL, ("netlink_attach: so_pcb != NULL"));
 
/* XXX */
-   rp = (rawcb *)malloc(sizeof *rp);
-   if (rp == NULL)
+   ncb = (netlinkcb *)malloc(sizeof *ncb);
+   if (ncb == NULL)
return ENOBUFS;
-   bzero(rp, sizeof *rp);
+   bzero(ncb, sizeof *ncb);
+   rp = &ncb->raw;
 
/*
 * The splnet() is necessary to block protocols from sending
@@ -362,7 +372,14 @@ netlink_bind(struct socket *so, struct bsd_sockaddr *nam, 
struct thread *td)
__FILE__, __LINE__, __FUNCTION__, nam->sa_len, 
sizeof(struct bsd_sockaddr_nl));
return EINVAL;
}
-   // TODO: stash the nl_pid somewhere
+   auto *ncb = reinterpret_cast(rp);
+   bsd_sockaddr_nl *nl_sock_addr = (bsd_sockaddr_nl*)nam;
+   if (nl_sock_addr->nl_pid == 0) { // kernel needs to assign pid
+   auto assigned_pid = _nl_next_gen_pid.fetch_add(1, 
std::memory_order_relaxed);
+   ncb->nl_pid = assigned_pid;
+   } else {
+   ncb->nl_pid = nl_sock_addr->nl_pid;
+   }
return 0;
}
return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-5-jwkozaczuk%40gmail.com.


[osv-dev] [PATCH 04/10] netlink: do not put IFA_BROADCAST for loopback address

2022-06-03 Thread Waldemar Kozaczuk
This is a minor adjustment to make OSv implementation match what Linux
does - skip IFA_BROADCAST attributes for loopback address in NEWADDR
response.

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/sys/compat/linux/linux_netlink.cc | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc 
b/bsd/sys/compat/linux/linux_netlink.cc
index 4208ce7f..7e743db8 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -616,8 +616,11 @@ netlink_process_getaddr_msg(struct socket *so, struct 
nlmsghdr *nlm)
in6_clearscope(&broadaddr.sin6_addr);
p_broadaddr = (struct bsd_sockaddr 
*)&broadaddr;
}
-   if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr) ||
-   nla_put_sockaddr(m, IFA_BROADCAST, 
p_broadaddr)){
+   if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr)){
+   error = ENOBUFS;
+   goto done;
+   }
+   if (!(ifm->ifa_flags & IFF_LOOPBACK) && 
nla_put_sockaddr(m, IFA_BROADCAST, p_broadaddr)){
error = ENOBUFS;
goto done;
}
@@ -625,8 +628,11 @@ netlink_process_getaddr_msg(struct socket *so, struct 
nlmsghdr *nlm)
else
 #endif
{
-   if (nla_put_sockaddr(m, IFA_ADDRESS, 
ifa->ifa_addr) ||
-   nla_put_sockaddr(m, IFA_BROADCAST, 
ifa->ifa_broadaddr)){
+   if (nla_put_sockaddr(m, IFA_ADDRESS, 
ifa->ifa_addr)){
+   error = ENOBUFS;
+   goto done;
+   }
+   if (!(ifm->ifa_flags & IFF_LOOPBACK) && 
nla_put_sockaddr(m, IFA_BROADCAST, ifa->ifa_broadaddr)){
error = ENOBUFS;
goto done;
}
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-4-jwkozaczuk%40gmail.com.


[osv-dev] [PATCH 03/10] netlink: IFA_ADDRESS needs to go first

2022-06-03 Thread Waldemar Kozaczuk
Golang uses the netlink interface RTM_GETADDR to query the network interfaces 
and IPs.
It assumes that the 1st attribute in the RTM_NEWADDR response is IFA_ADDRESS. 
This
patch changes the order in which RTM_NEWADDR attributes are sent to make
sure the IFA_ADDRESS goes first and IFA_LABEL last.

This does not seem to be documented anywhere but Linux sends RTM_NEWADDR 
responses
with the IFA_ADDRESS attribute first so we follow suit.

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/sys/compat/linux/linux_netlink.cc | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc 
b/bsd/sys/compat/linux/linux_netlink.cc
index ea0cf609..4208ce7f 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -599,10 +599,6 @@ netlink_process_getaddr_msg(struct socket *so, struct 
nlmsghdr *nlm)
ifm->ifa_prefixlen = 
get_sockaddr_mask_prefix_len(ifa->ifa_netmask);
ifm->ifa_flags = ifp->if_flags | ifp->if_drv_flags;
ifm->ifa_scope = 0; // FIXME:
-   if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) {
-   error = ENOBUFS;
-   goto done;
-   }
 #ifdef INET6
if (ifa->ifa_addr && ifa->ifa_addr->sa_family == 
AF_INET6){
// FreeBSD embeds the IPv6 scope ID in the IPv6 
address
@@ -635,6 +631,10 @@ netlink_process_getaddr_msg(struct socket *so, struct 
nlmsghdr *nlm)
goto done;
}
}
+   if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) {
+   error = ENOBUFS;
+   goto done;
+   }
nlmsg_end(m, nlh);
}
 
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-3-jwkozaczuk%40gmail.com.


[osv-dev] [PATCH 02/10] netlink: set LINUX_RTM_NEWADDR and LINUX_RTM_NEWNEIGH on responses

2022-06-03 Thread Waldemar Kozaczuk
This patch fixes a minor bug in handling RTM_GETADDR and RTM_GETNEIGH
requests. It tweaks the relevant code to set the RTM_NEWADDR and RTM_NEWNEIGH
type for the responses respectively.

This is important as for example Golang runtime tests the nlmsg_type of
the netlink response and breaks if it is wrong. 

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/sys/compat/linux/linux_netlink.cc | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc 
b/bsd/sys/compat/linux/linux_netlink.cc
index bc02bb7f..ea0cf609 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -588,7 +588,7 @@ netlink_process_getaddr_msg(struct socket *so, struct 
nlmsghdr *nlm)
if (!ifa->ifa_addr)
continue;
 
-   nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, 
LINUX_RTM_GETADDR, sizeof(*ifm), nlm->nlmsg_flags);
+   nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, 
LINUX_RTM_NEWADDR, sizeof(*ifm), nlm->nlmsg_flags);
if (!nlh) {
error = ENOBUFS;
goto done;
@@ -720,7 +720,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry 
*lle, void *data)
struct nlmsghdr *nlm = cbdata->nlm;
struct mbuf *m = cbdata->m;
struct ndmsg *ndm;
-   struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, 
LINUX_RTM_GETNEIGH, sizeof(*ndm), nlm->nlmsg_flags);
+   struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, 
LINUX_RTM_NEWNEIGH, sizeof(*ndm), nlm->nlmsg_flags);
 
if (!nlh) {
return ENOBUFS;
@@ -753,7 +753,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry 
*lle, void *data)
}
}
 #endif
-   
+
if (nla_put(m, NDA_LLADDR, 6, lle->ll_addr.mac16)) {
return ENOBUFS;
}
@@ -875,29 +875,29 @@ extern struct domain netlinkdomain;   /* or 
at least forward */
 
 static struct protosw netlinksw[] = {
initialize_with([] (protosw& x) {
-   x.pr_type = SOCK_RAW;
+   x.pr_type = SOCK_RAW;
x.pr_domain =   &netlinkdomain;
x.pr_flags =PR_ATOMIC|PR_ADDR;
x.pr_output =   netlink_output;
x.pr_ctlinput = raw_ctlinput;
-   x.pr_init = raw_init;
+   x.pr_init = raw_init;
x.pr_usrreqs =  &netlink_usrreqs;
}),
initialize_with([] (protosw& x) {
-   x.pr_type = SOCK_DGRAM;
+   x.pr_type = SOCK_DGRAM;
x.pr_domain =   &netlinkdomain;
x.pr_flags =PR_ATOMIC|PR_ADDR;
x.pr_output =   netlink_output;
x.pr_ctlinput = raw_ctlinput;
-   x.pr_init = raw_init;
+   x.pr_init = raw_init;
x.pr_usrreqs =  &netlink_usrreqs;
}),
 };
 
 struct domain netlinkdomain = initialize_with([] (domain& x) {
-   x.dom_family =  PF_NETLINK;
-   x.dom_name ="netlink";
-   x.dom_protosw = netlinksw;
+   x.dom_family =  PF_NETLINK;
+   x.dom_name ="netlink";
+   x.dom_protosw = netlinksw;
x.dom_protoswNPROTOSW = 
&netlinksw[sizeof(netlinksw)/sizeof(netlinksw[0])];
 });
 
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220604012837.214986-2-jwkozaczuk%40gmail.com.


[osv-dev] [PATCH 01/10] netlink: minimal Linux rtnetlink support

2022-06-03 Thread Waldemar Kozaczuk
This 1st of the 10 patches brings support of the minimal subset of
the rtnetlink (Linux routing socket) interface as described here -
https://man7.org/linux/man-pages/man7/rtnetlink.7.html.
The rtnetlink is actually a subset of even richer netlink interface
described here - https://man7.org/linux/man-pages/man7/netlink.7.html.
In other words, rtnetlink covers a NETLINK_ROUTE family of the broader
netlink interface.

We need rtnetlink in order to support the implemetation of
if_nameindex() and getifaddrs() in modern musl 1.1.24. In addition
Golang uses the netlink interface to discover the interfaces and IP
address as well.

Please note this is an original copy of the Charles Myers' two commits:
f1cd48e0f192564d64e7b1e1caccc8df05e7cd5d except of the modifications to
bsd/net.cc that are part of the last commit and subset of the
64a0c1affe9921e6a5a5b164edf1a544a7297393 that adds lltable_foreach()
and lltable_foreach_lle(). The next 8 much smaller patches fix various
small bugs and enhance slightly this implementation.
The last one enables the netlink support and adds a unit test.

The netlink interface is pretty rich and not very precisely documented.
I have actually used a unit test to discover in more details how the
netlink responses should look like.

In general, the application would use standard socket API to open a
socket with the domain and protocol equal to AF_NETLINK and NETLINK_ROUTE
respectively and typically use SOCK_RAW as type. Then it would
optionally bind the socket and build a request sent using standard
sendmsg(). Finally it would receive all replies from kernel using
recvmsg().

To illustrate, the incomplete code might look like this:


//step 1
int s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);

//step 2
src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = pid; // if 0 kernel will assign unique id
bind(s, (struct sockaddr*) &src_addr, sizeof(src_addr))

// step 3
dst_addr.nl_family = AF_NETLINK;
dst_addr.nl_pid = 0; // should be 0 if destination is kernel

iov[0].iov_base = req;
iov[0].iov_len = req->nlmsg_len;

snd_msg.msg_iov = iov;
snd_msg.msg_iovlen = 1;
snd_msg.msg_name = &dst_addr;
snd_msg.msg_namelen = sizeof(dst_addr);

sendmsg(s, &snd_msg, 0)

//step 4
rcv_msg.msg_iov[0].iov_base = buf;
rcv_msg.msg_iov[0].iov_len = BUFSIZE;
recvmsg(s, &rcv_msg, 0)
//process replies received in buf


This patch implements support of only 3 rtnetlink types of requests:
- RTM_GETLINK
- RTM_GETADDR
- RTM_GETNEIGH

The bulk of the implementation is in the linux_netlink.cc and 
mostly centers around following functions:
- netlink_attach()
- netlink_bind()
- netlink_output()
- netlink_process_msg()
- netlink_process_getlink_msg()
- netlink_process_getaddr_msg()
- netlink_process_getneigh_msg()

Most other pru_* functions delegate to raw_usrreqs as is.

Authored-by: Charles Myers 
Signed-off-by: Waldemar Kozaczuk 
---
 Makefile  |   1 +
 bsd/sys/compat/linux/linux_netlink.cc | 904 ++
 bsd/sys/compat/linux/linux_netlink.h  | 175 +
 bsd/sys/compat/linux/linux_socket.cc  |   5 +
 bsd/sys/compat/linux/linux_socket.h   |   1 +
 bsd/sys/net/if_llatbl.cc  |  46 +-
 bsd/sys/net/if_llatbl.h   |  13 +
 bsd/sys/net/netisr.h  |   1 +
 8 files changed, 1143 insertions(+), 3 deletions(-)
 create mode 100644 bsd/sys/compat/linux/linux_netlink.cc
 create mode 100644 bsd/sys/compat/linux/linux_netlink.h

diff --git a/Makefile b/Makefile
index 19a4571b..2d1ba6a8 100644
--- a/Makefile
+++ b/Makefile
@@ -593,6 +593,7 @@ bsd += bsd/porting/bus_dma.o
 bsd += bsd/sys/netinet/if_ether.o
 bsd += bsd/sys/compat/linux/linux_socket.o
 bsd += bsd/sys/compat/linux/linux_ioctl.o
+bsd += bsd/sys/compat/linux/linux_netlink.o
 bsd += bsd/sys/net/if_ethersubr.o
 bsd += bsd/sys/net/if_llatbl.o
 bsd += bsd/sys/net/radix.o
diff --git a/bsd/sys/compat/linux/linux_netlink.cc 
b/bsd/sys/compat/linux/linux_netlink.cc
new file mode 100644
index ..bc02bb7f
--- /dev/null
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -0,0 +1,904 @@
+/*
+ * Linux NETLINK socket implementation.
+ *
+ * NETLINK is used to support IPv4/IPv6 LIBC getifaddrs(), if_nameindex().
+ *
+ * Warning: Tx/Rx messages are compatible with Linux not FreeBSD.
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#ifdef INET6
+#include 
+#include 
+#include 
+#include 
+#include 
+#endif
+
+#include 
+#include 
+#include 
+
+#if !defined(offsetof)
+#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+#endif
+
+mutex netlink_mtx;
+
+#define NETLINK_LOCK()  mutex_lock(&netlink_mtx)
+#define NETLINK_UNLOCK() mutex_unlock(&netlink_mtx)
+#define NETLINK_LOCK_ASSERT()   assert